From 21a9c50bde3c4cd7bc652d79066a0ffbb5cc25fd Mon Sep 17 00:00:00 2001 From: Daniele Tricoli Date: Wed, 31 Dec 2014 12:43:02 +0000 Subject: New upstream release --- debian/changelog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/debian/changelog b/debian/changelog index 2d8e46e..9d67ba3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +python-urllib3 (1.10-1) UNRELEASED; urgency=medium + + * New upstream release. + + -- Daniele Tricoli Wed, 31 Dec 2014 13:42:51 +0100 + python-urllib3 (1.9.1-3) unstable; urgency=medium [ Stefano Rivera ] -- cgit v1.2.3 From 46d20441e9abdc646f5709c47f6a1dad3cee72fc Mon Sep 17 00:00:00 2001 From: Daniele Tricoli Date: Wed, 31 Dec 2014 17:40:10 +0000 Subject: Refresh 01_do-not-use-embedded-python-six.patch --- debian/changelog | 4 +++- debian/patches/01_do-not-use-embedded-python-six.patch | 13 +++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/debian/changelog b/debian/changelog index 9d67ba3..3b7043e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,10 @@ python-urllib3 (1.10-1) UNRELEASED; urgency=medium * New upstream release. + * debian/patches/01_do-not-use-embedded-python-six.patch + - Refresh. - -- Daniele Tricoli Wed, 31 Dec 2014 13:42:51 +0100 + -- Daniele Tricoli Wed, 31 Dec 2014 18:24:49 +0100 python-urllib3 (1.9.1-3) unstable; urgency=medium diff --git a/debian/patches/01_do-not-use-embedded-python-six.patch b/debian/patches/01_do-not-use-embedded-python-six.patch index a7a0716..62a5a51 100644 --- a/debian/patches/01_do-not-use-embedded-python-six.patch +++ b/debian/patches/01_do-not-use-embedded-python-six.patch @@ -1,7 +1,7 @@ Description: Do not use embedded copy of python-six. Author: Daniele Tricoli Forwarded: not-needed -Last-Update: 2014-09-23 +Last-Update: 2014-12-31 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -99,8 +99,8 @@ Last-Update: 2014-09-23 from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict --from .packages.six import itervalues -+from six import itervalues +-from .packages.six import iterkeys, itervalues ++from six import iterkeys, itervalues __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -127,9 +127,9 @@ Last-Update: 2014-09-23 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py -@@ -7,7 +7,7 @@ +@@ -8,7 +8,7 @@ ReadTimeoutError, - MaxRetryError, + ResponseError, ) -from ..packages import six +import six @@ -138,9 +138,10 @@ Last-Update: 2014-09-23 log = logging.getLogger(__name__) --- a/test/test_retry.py +++ b/test/test_retry.py -@@ -1,6 +1,6 @@ +@@ -1,7 +1,7 @@ import unittest + from urllib3.response import HTTPResponse -from urllib3.packages.six.moves import xrange +from six.moves import xrange from urllib3.util.retry import Retry -- cgit v1.2.3 From 855b284926916486682eadeb1d077a7456a92ef9 Mon Sep 17 00:00:00 2001 From: Daniele Tricoli Date: Thu, 15 Jan 2015 21:59:15 +0000 Subject: Remove 06_do-not-make-SSLv3-mandatory.patch since it was merged upstream --- debian/changelog | 4 +++- .../patches/06_do-not-make-SSLv3-mandatory.patch | 25 ---------------------- debian/patches/series | 1 - 3 files changed, 3 insertions(+), 27 deletions(-) delete mode 100644 debian/patches/06_do-not-make-SSLv3-mandatory.patch diff --git a/debian/changelog b/debian/changelog index 3b7043e..b26b20b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,8 +3,10 @@ python-urllib3 (1.10-1) UNRELEASED; urgency=medium * New upstream release. * debian/patches/01_do-not-use-embedded-python-six.patch - Refresh. + * debian/patches/06_do-not-make-SSLv3-mandatory.patch + - Remove since it was merged upstream. - -- Daniele Tricoli Wed, 31 Dec 2014 18:24:49 +0100 + -- Daniele Tricoli Thu, 15 Jan 2015 22:58:53 +0100 python-urllib3 (1.9.1-3) unstable; urgency=medium diff --git a/debian/patches/06_do-not-make-SSLv3-mandatory.patch b/debian/patches/06_do-not-make-SSLv3-mandatory.patch deleted file mode 100644 index c072d60..0000000 --- a/debian/patches/06_do-not-make-SSLv3-mandatory.patch +++ /dev/null @@ -1,25 +0,0 @@ -Description: Since SSL version 3 is insecure it is supported only if Python - supports it. In Debian SSL version 3 is disabled in system Python since - 2.7.8-12. -Author: Daniele Tricoli -Forwarded: https://github.com/shazow/urllib3/issues/487#issuecomment-63805742 -Last/Update: 2014-11-20 - ---- a/urllib3/contrib/pyopenssl.py -+++ b/urllib3/contrib/pyopenssl.py -@@ -70,9 +70,14 @@ - # Map from urllib3 to PyOpenSSL compatible parameter-values. - _openssl_versions = { - ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, -- ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, - ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, - } -+ -+try: -+ _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) -+except AttributeError: -+ pass -+ - _openssl_verify = { - ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, - ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, diff --git a/debian/patches/series b/debian/patches/series index 30602ad..b77d657 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -3,4 +3,3 @@ 03_force_setuptools.patch 04_relax_nosetests_options.patch 05_avoid-embedded-ssl-match-hostname.patch -06_do-not-make-SSLv3-mandatory.patch -- cgit v1.2.3 From 5c16023d92731d837dc87905a586067d720bef61 Mon Sep 17 00:00:00 2001 From: Piotr Ożarowski Date: Sat, 17 Jan 2015 11:48:18 +0000 Subject: s/UNRELEASED/experimental/ --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index b26b20b..06d041a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -python-urllib3 (1.10-1) UNRELEASED; urgency=medium +python-urllib3 (1.10-1) experimental; urgency=medium * New upstream release. * debian/patches/01_do-not-use-embedded-python-six.patch -- cgit v1.2.3 From 0c183b9d52b45bac22a2ff9db0e6348b655f4ab2 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:30 -0700 Subject: Imported Upstream version 1.2.2 --- CHANGES.rst | 146 +++++ CONTRIBUTORS.txt | 43 ++ LICENSE.txt | 19 + MANIFEST.in | 1 + PKG-INFO | 265 +++++++++ README.rst | 97 ++++ dummyserver/__init__.py | 0 dummyserver/handlers.py | 159 ++++++ dummyserver/server.py | 113 ++++ dummyserver/testcase.py | 71 +++ setup.cfg | 10 + setup.py | 53 ++ test-requirements.txt | 2 + test/__init__.py | 0 test/benchmark.py | 77 +++ test/test_collections.py | 111 ++++ test/test_connectionpool.py | 136 +++++ test/test_poolmanager.py | 47 ++ test/test_response.py | 68 +++ urllib3.egg-info/PKG-INFO | 265 +++++++++ urllib3.egg-info/SOURCES.txt | 36 ++ urllib3.egg-info/dependency_links.txt | 1 + urllib3.egg-info/top_level.txt | 2 + urllib3/__init__.py | 48 ++ urllib3/_collections.py | 131 +++++ urllib3/connectionpool.py | 629 +++++++++++++++++++++ urllib3/contrib/__init__.py | 0 urllib3/contrib/ntlmpool.py | 120 ++++ urllib3/exceptions.py | 67 +++ urllib3/filepost.py | 74 +++ urllib3/packages/__init__.py | 4 + .../packages/mimetools_choose_boundary/__init__.py | 47 ++ urllib3/packages/six.py | 372 ++++++++++++ urllib3/packages/ssl_match_hostname/__init__.py | 61 ++ urllib3/poolmanager.py | 138 +++++ urllib3/request.py | 147 +++++ urllib3/response.py | 191 +++++++ 37 files changed, 3751 insertions(+) create mode 100644 CHANGES.rst create mode 100644 CONTRIBUTORS.txt create mode 100644 LICENSE.txt create mode 100644 MANIFEST.in create mode 100644 PKG-INFO create mode 100644 README.rst create mode 100644 dummyserver/__init__.py create mode 100644 dummyserver/handlers.py create mode 100755 dummyserver/server.py create mode 100644 dummyserver/testcase.py create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 test-requirements.txt create mode 100644 test/__init__.py create mode 100644 test/benchmark.py create mode 100644 test/test_collections.py create mode 100644 test/test_connectionpool.py create mode 100644 test/test_poolmanager.py create mode 100644 test/test_response.py create mode 100644 urllib3.egg-info/PKG-INFO create mode 100644 urllib3.egg-info/SOURCES.txt create mode 100644 urllib3.egg-info/dependency_links.txt create mode 100644 urllib3.egg-info/top_level.txt create mode 100644 urllib3/__init__.py create mode 100644 urllib3/_collections.py create mode 100644 urllib3/connectionpool.py create mode 100644 urllib3/contrib/__init__.py create mode 100644 urllib3/contrib/ntlmpool.py create mode 100644 urllib3/exceptions.py create mode 100644 urllib3/filepost.py create mode 100644 urllib3/packages/__init__.py create mode 100644 urllib3/packages/mimetools_choose_boundary/__init__.py create mode 100644 urllib3/packages/six.py create mode 100644 urllib3/packages/ssl_match_hostname/__init__.py create mode 100644 urllib3/poolmanager.py create mode 100644 urllib3/request.py create mode 100644 urllib3/response.py diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..d998db8 --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,146 @@ +Changes +======= + + +1.2.2 (2012-02-06) +++++++++++++++++++ + +* Fixed packaging bug of not shipping ``test-requirements.txt``. (Issue #47) + + +1.2.1 (2012-02-05) +++++++++++++++++++ + +* Fixed another bug related to when ``ssl`` module is not available. (Issue #41) + +* Location parsing errors now raise ``urllib3.exceptions.LocationParseError`` + which inherits from ``ValueError``. + + +1.2 (2012-01-29) +++++++++++++++++ + +* Added Python 3 support (tested on 3.2.2) + +* Dropped Python 2.5 support (tested on 2.6.7, 2.7.2) + +* Use ``select.poll`` instead of ``select.select`` for platforms that support + it. + +* Use ``Queue.LifoQueue`` instead of ``Queue.Queue`` for more aggressive + connection reusing. Configurable by overriding ``ConnectionPool.QueueCls``. + +* Fixed ``ImportError`` during install when ``ssl`` module is not available. + (Issue #41) + +* Fixed ``PoolManager`` redirects between schemes (such as HTTP -> HTTPS) not + completing properly. (Issue #28, uncovered by Issue #10 in v1.1) + +* Ported ``dummyserver`` to use ``tornado`` instead of ``webob`` + + ``eventlet``. Removed extraneous unsupported dummyserver testing backends. + Added socket-level tests. + +* More tests. Achievement Unlocked: 99% Coverage. + + +1.1 (2012-01-07) +++++++++++++++++ + +* Refactored ``dummyserver`` to its own root namespace module (used for + testing). + +* Added hostname verification for ``VerifiedHTTPSConnection`` by vendoring in + Py32's ``ssl_match_hostname``. (Issue #25) + +* Fixed cross-host HTTP redirects when using ``PoolManager``. (Issue #10) + +* Fixed ``decode_content`` being ignored when set through ``urlopen``. (Issue + #27) + +* Fixed timeout-related bugs. (Issues #17, #23) + + +1.0.2 (2011-11-04) +++++++++++++++++++ + +* Fixed typo in ``VerifiedHTTPSConnection`` which would only present as a bug if + you're using the object manually. (Thanks pyos) + +* Made RecentlyUsedContainer (and consequently PoolManager) more thread-safe by + wrapping the access log in a mutex. (Thanks @christer) + +* Made RecentlyUsedContainer more dict-like (corrected ``__delitem__`` and + ``__getitem__`` behaviour), with tests. Shouldn't affect core urllib3 code. + + +1.0.1 (2011-10-10) +++++++++++++++++++ + +* Fixed a bug where the same connection would get returned into the pool twice, + causing extraneous "HttpConnectionPool is full" log warnings. + + +1.0 (2011-10-08) +++++++++++++++++ + +* Added ``PoolManager`` with LRU expiration of connections (tested and + documented). +* Added ``ProxyManager`` (needs tests, docs, and confirmation that it works + with HTTPS proxies). +* Added optional partial-read support for responses when + ``preload_content=False``. You can now make requests and just read the headers + without loading the content. +* Made response decoding optional (default on, same as before). +* Added optional explicit boundary string for ``encode_multipart_formdata``. +* Convenience request methods are now inherited from ``RequestMethods``. Old + helpers like ``get_url`` and ``post_url`` should be abandoned in favour of + the new ``request(method, url, ...)``. +* Refactored code to be even more decoupled, reusable, and extendable. +* License header added to ``.py`` files. +* Embiggened the documentation: Lots of Sphinx-friendly docstrings in the code + and docs in ``docs/`` and on urllib3.readthedocs.org. +* Embettered all the things! +* Started writing this file. + + +0.4.1 (2011-07-17) +++++++++++++++++++ + +* Minor bug fixes, code cleanup. + + +0.4 (2011-03-01) +++++++++++++++++ + +* Better unicode support. +* Added ``VerifiedHTTPSConnection``. +* Added ``NTLMConnectionPool`` in contrib. +* Minor improvements. + + +0.3.1 (2010-07-13) +++++++++++++++++++ + +* Added ``assert_host_name`` optional parameter. Now compatible with proxies. + + +0.3 (2009-12-10) +++++++++++++++++ + +* Added HTTPS support. +* Minor bug fixes. +* Refactored, broken backwards compatibility with 0.2. +* API to be treated as stable from this version forward. + + +0.2 (2008-11-17) +++++++++++++++++ + +* Added unit tests. +* Bug fixes. + + +0.1 (2008-11-16) +++++++++++++++++ + +* First release. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt new file mode 100644 index 0000000..37140ca --- /dev/null +++ b/CONTRIBUTORS.txt @@ -0,0 +1,43 @@ +# Contributions to the urllib3 project + +## Creator & Maintainer + +* Andrey Petrov + + +## Contributors + +In chronological order: + +* victor.vde + * HTTPS patch (which inspired HTTPSConnectionPool) + +* erikcederstrand + * NTLM-authenticated HTTPSConnectionPool + * Basic-authenticated HTTPSConnectionPool (merged into make_headers) + +* niphlod + * Client-verified SSL certificates for HTTPSConnectionPool + * Response gzip and deflate encoding support + * Better unicode support for filepost using StringIO buffers + +* btoconnor + * Non-multipart encoding for POST requests + +* p.dobrogost + * Code review, PEP8 compliance, benchmark fix + +* kennethreitz + * Bugfixes, suggestions, Requests integration + +* georgemarshall + * Bugfixes, Improvements and Test coverage + +* Thomas Kluyver + * Python 3 support + +* brandon-rhodes + * Design review, bugfixes, test coverage. + +* [Your name or handle] <[email or website]> + * [Brief summary of your changes] diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..f658ad6 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,19 @@ +This is the MIT license: http://www.opensource.org/licenses/mit-license.php + +Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..d1abae2 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt test-requirements.txt diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..2f0ab55 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,265 @@ +Metadata-Version: 1.0 +Name: urllib3 +Version: 1.2.2 +Summary: HTTP library with thread-safe connection pooling, file post, and more. +Home-page: http://urllib3.readthedocs.org/ +Author: Andrey Petrov +Author-email: andrey.petrov@shazow.net +License: MIT +Description: Highlights + ========== + + - Re-use the same socket connection for multiple requests + (``HTTPConnectionPool`` and ``HTTPSConnectionPool``) + (with optional client-side certificate verification). + - File posting (``encode_multipart_formdata``). + - Built-in redirection and retries (optional). + - Supports gzip and deflate decoding. + - Thread-safe and sanity-safe. + - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Small and easy to understand codebase perfect for extending and building upon. + For a more comprehensive solution, have a look at + `Requests `_ which is also powered by urllib3. + + What's wrong with urllib and urllib2? + ===================================== + + There are two critical features missing from the Python standard library: + Connection re-using/pooling and file posting. It's not terribly hard to + implement these yourself, but it's much easier to use a module that already + did the work for you. + + The Python standard libraries ``urllib`` and ``urllib2`` have little to do + with each other. They were designed to be independent and standalone, each + solving a different scope of problems, and ``urllib3`` follows in a similar + vein. + + Why do I want to reuse connections? + =================================== + + Performance. When you normally do a urllib call, a separate socket + connection is created with each request. By reusing existing sockets + (supported since HTTP 1.1), the requests will take up less resources on the + server's end, and also provide a faster response time at the client's end. + With some simple benchmarks (see `test/benchmark.py + `_ + ), downloading 15 URLs from google.com is about twice as fast when using + HTTPConnectionPool (which uses 1 connection) than using plain urllib (which + uses 15 connections). + + This library is perfect for: + + - Talking to an API + - Crawling a website + - Any situation where being able to post files, handle redirection, and + retrying is useful. It's relatively lightweight, so it can be used for + anything! + + Examples + ======== + + Go to `urllib3.readthedocs.org `_ + for more nice syntax-highlighted examples. + + But, long story short:: + + import urllib3 + + http = urllib3.PoolManager() + + r = http.request('GET', 'http://google.com/') + + print r.status, r.data + + The ``PoolManager`` will take care of reusing connections for you whenever + you request the same host. For more fine-grained control of your connection + pools, you should look at + `ConnectionPool `_. + + + Run the tests + ============= + + We use some external dependencies to run the urllib3 test suite. Easiest way to + run the tests is thusly from the urllib3 source root: :: + + $ pip install -r test-requirements.txt + $ nosetests + ..................................................... + + Success! You could also ``pip install coverage`` to get code coverage reporting. + + + Contributing + ============ + + #. `Check for open issues `_ or open + a fresh issue to start a discussion around a feature idea or a bug. There is + a *Contributor Friendly* tag for issues that should be ideal for people who + are not very familiar with the codebase yet. + #. Fork the `urllib3 repository on Github `_ + to start making your changes. + #. Write a test which shows that the bug was fixed or that the feature works + as expected. + #. Send a pull request and bug the maintainer until it gets merged and published. + :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + + + Changes + ======= + + + 1.2.2 (2012-02-06) + ++++++++++++++++++ + + * Fixed packaging bug of not shipping ``test-requirements.txt``. (Issue #47) + + + 1.2.1 (2012-02-05) + ++++++++++++++++++ + + * Fixed another bug related to when ``ssl`` module is not available. (Issue #41) + + * Location parsing errors now raise ``urllib3.exceptions.LocationParseError`` + which inherits from ``ValueError``. + + + 1.2 (2012-01-29) + ++++++++++++++++ + + * Added Python 3 support (tested on 3.2.2) + + * Dropped Python 2.5 support (tested on 2.6.7, 2.7.2) + + * Use ``select.poll`` instead of ``select.select`` for platforms that support + it. + + * Use ``Queue.LifoQueue`` instead of ``Queue.Queue`` for more aggressive + connection reusing. Configurable by overriding ``ConnectionPool.QueueCls``. + + * Fixed ``ImportError`` during install when ``ssl`` module is not available. + (Issue #41) + + * Fixed ``PoolManager`` redirects between schemes (such as HTTP -> HTTPS) not + completing properly. (Issue #28, uncovered by Issue #10 in v1.1) + + * Ported ``dummyserver`` to use ``tornado`` instead of ``webob`` + + ``eventlet``. Removed extraneous unsupported dummyserver testing backends. + Added socket-level tests. + + * More tests. Achievement Unlocked: 99% Coverage. + + + 1.1 (2012-01-07) + ++++++++++++++++ + + * Refactored ``dummyserver`` to its own root namespace module (used for + testing). + + * Added hostname verification for ``VerifiedHTTPSConnection`` by vendoring in + Py32's ``ssl_match_hostname``. (Issue #25) + + * Fixed cross-host HTTP redirects when using ``PoolManager``. (Issue #10) + + * Fixed ``decode_content`` being ignored when set through ``urlopen``. (Issue + #27) + + * Fixed timeout-related bugs. (Issues #17, #23) + + + 1.0.2 (2011-11-04) + ++++++++++++++++++ + + * Fixed typo in ``VerifiedHTTPSConnection`` which would only present as a bug if + you're using the object manually. (Thanks pyos) + + * Made RecentlyUsedContainer (and consequently PoolManager) more thread-safe by + wrapping the access log in a mutex. (Thanks @christer) + + * Made RecentlyUsedContainer more dict-like (corrected ``__delitem__`` and + ``__getitem__`` behaviour), with tests. Shouldn't affect core urllib3 code. + + + 1.0.1 (2011-10-10) + ++++++++++++++++++ + + * Fixed a bug where the same connection would get returned into the pool twice, + causing extraneous "HttpConnectionPool is full" log warnings. + + + 1.0 (2011-10-08) + ++++++++++++++++ + + * Added ``PoolManager`` with LRU expiration of connections (tested and + documented). + * Added ``ProxyManager`` (needs tests, docs, and confirmation that it works + with HTTPS proxies). + * Added optional partial-read support for responses when + ``preload_content=False``. You can now make requests and just read the headers + without loading the content. + * Made response decoding optional (default on, same as before). + * Added optional explicit boundary string for ``encode_multipart_formdata``. + * Convenience request methods are now inherited from ``RequestMethods``. Old + helpers like ``get_url`` and ``post_url`` should be abandoned in favour of + the new ``request(method, url, ...)``. + * Refactored code to be even more decoupled, reusable, and extendable. + * License header added to ``.py`` files. + * Embiggened the documentation: Lots of Sphinx-friendly docstrings in the code + and docs in ``docs/`` and on urllib3.readthedocs.org. + * Embettered all the things! + * Started writing this file. + + + 0.4.1 (2011-07-17) + ++++++++++++++++++ + + * Minor bug fixes, code cleanup. + + + 0.4 (2011-03-01) + ++++++++++++++++ + + * Better unicode support. + * Added ``VerifiedHTTPSConnection``. + * Added ``NTLMConnectionPool`` in contrib. + * Minor improvements. + + + 0.3.1 (2010-07-13) + ++++++++++++++++++ + + * Added ``assert_host_name`` optional parameter. Now compatible with proxies. + + + 0.3 (2009-12-10) + ++++++++++++++++ + + * Added HTTPS support. + * Minor bug fixes. + * Refactored, broken backwards compatibility with 0.2. + * API to be treated as stable from this version forward. + + + 0.2 (2008-11-17) + ++++++++++++++++ + + * Added unit tests. + * Bug fixes. + + + 0.1 (2008-11-16) + ++++++++++++++++ + + * First release. + +Keywords: urllib httplib threadsafe filepost http https ssl pooling +Platform: UNKNOWN +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Software Development :: Libraries diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..b376c81 --- /dev/null +++ b/README.rst @@ -0,0 +1,97 @@ +Highlights +========== + +- Re-use the same socket connection for multiple requests + (``HTTPConnectionPool`` and ``HTTPSConnectionPool``) + (with optional client-side certificate verification). +- File posting (``encode_multipart_formdata``). +- Built-in redirection and retries (optional). +- Supports gzip and deflate decoding. +- Thread-safe and sanity-safe. +- Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. +- Small and easy to understand codebase perfect for extending and building upon. + For a more comprehensive solution, have a look at + `Requests `_ which is also powered by urllib3. + +What's wrong with urllib and urllib2? +===================================== + +There are two critical features missing from the Python standard library: +Connection re-using/pooling and file posting. It's not terribly hard to +implement these yourself, but it's much easier to use a module that already +did the work for you. + +The Python standard libraries ``urllib`` and ``urllib2`` have little to do +with each other. They were designed to be independent and standalone, each +solving a different scope of problems, and ``urllib3`` follows in a similar +vein. + +Why do I want to reuse connections? +=================================== + +Performance. When you normally do a urllib call, a separate socket +connection is created with each request. By reusing existing sockets +(supported since HTTP 1.1), the requests will take up less resources on the +server's end, and also provide a faster response time at the client's end. +With some simple benchmarks (see `test/benchmark.py +`_ +), downloading 15 URLs from google.com is about twice as fast when using +HTTPConnectionPool (which uses 1 connection) than using plain urllib (which +uses 15 connections). + +This library is perfect for: + +- Talking to an API +- Crawling a website +- Any situation where being able to post files, handle redirection, and + retrying is useful. It's relatively lightweight, so it can be used for + anything! + +Examples +======== + +Go to `urllib3.readthedocs.org `_ +for more nice syntax-highlighted examples. + +But, long story short:: + + import urllib3 + + http = urllib3.PoolManager() + + r = http.request('GET', 'http://google.com/') + + print r.status, r.data + +The ``PoolManager`` will take care of reusing connections for you whenever +you request the same host. For more fine-grained control of your connection +pools, you should look at +`ConnectionPool `_. + + +Run the tests +============= + +We use some external dependencies to run the urllib3 test suite. Easiest way to +run the tests is thusly from the urllib3 source root: :: + + $ pip install -r test-requirements.txt + $ nosetests + ..................................................... + +Success! You could also ``pip install coverage`` to get code coverage reporting. + + +Contributing +============ + +#. `Check for open issues `_ or open + a fresh issue to start a discussion around a feature idea or a bug. There is + a *Contributor Friendly* tag for issues that should be ideal for people who + are not very familiar with the codebase yet. +#. Fork the `urllib3 repository on Github `_ + to start making your changes. +#. Write a test which shows that the bug was fixed or that the feature works + as expected. +#. Send a pull request and bug the maintainer until it gets merged and published. + :) Make sure to add yourself to ``CONTRIBUTORS.txt``. diff --git a/dummyserver/__init__.py b/dummyserver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py new file mode 100644 index 0000000..3e32881 --- /dev/null +++ b/dummyserver/handlers.py @@ -0,0 +1,159 @@ +from __future__ import print_function + +import gzip +import logging +import sys +import time +import zlib + +from io import BytesIO +from tornado.wsgi import HTTPRequest + +try: + from urllib.parse import urlsplit +except ImportError: + from urlparse import urlsplit + +log = logging.getLogger(__name__) + + +class Response(object): + def __init__(self, body='', status='200 OK', headers=None): + if not isinstance(body, bytes): + body = body.encode('utf8') + + self.body = body + self.status = status + self.headers = headers or [("Content-type", "text/plain")] + + def __call__(self, environ, start_response): + start_response(self.status, self.headers) + return [self.body] + + +class WSGIHandler(object): + pass + + +class TestingApp(WSGIHandler): + """ + Simple app that performs various operations, useful for testing an HTTP + library. + + Given any path, it will attempt to convert it will load a corresponding + local method if it exists. Status code 200 indicates success, 400 indicates + failure. Each method has its own conditions for success/failure. + """ + def __call__(self, environ, start_response): + req = HTTPRequest(environ) + + req.params = {} + for k, v in req.arguments.items(): + req.params[k] = next(iter(v)) + + path = req.path[:] + if not path.startswith('/'): + path = urlsplit(path).path + + target = path[1:].replace('/', '_') + method = getattr(self, target, self.index) + resp = method(req) + + if dict(resp.headers).get('Connection') == 'close': + # FIXME: Can we kill the connection somehow? + pass + + return resp(environ, start_response) + + def index(self, _request): + "Render simple message" + return Response("Dummy server!") + + def set_up(self, request): + test_type = request.params.get('test_type') + test_id = request.params.get('test_id') + if test_id: + print('\nNew test %s: %s' % (test_type, test_id)) + else: + print('\nNew test %s' % test_type) + return Response("Dummy server is ready!") + + def specific_method(self, request): + "Confirm that the request matches the desired method type" + method = request.params.get('method') + if method and not isinstance(method, str): + method = method.decode('utf8') + + if request.method != method: + return Response("Wrong method: %s != %s" % + (method, request.method), status='400') + return Response() + + def upload(self, request): + "Confirm that the uploaded file conforms to specification" + # FIXME: This is a huge broken mess + param = request.params.get('upload_param', 'myfile').decode('ascii') + filename = request.params.get('upload_filename', '').decode('utf-8') + size = int(request.params.get('upload_size', '0')) + files_ = request.files.get(param) + + if len(files_) != 1: + return Response("Expected 1 file for '%s', not %d" %(param, len(files_)), + status='400') + file_ = files_[0] + + data = file_['body'] + if int(size) != len(data): + return Response("Wrong size: %d != %d" % + (size, len(data)), status='400') + + if filename != file_['filename']: + return Response("Wrong filename: %s != %s" % + (filename, file_.filename), status='400') + + return Response() + + def redirect(self, request): + "Perform a redirect to ``target``" + target = request.params.get('target', '/') + headers = [('Location', target)] + return Response(status='303', headers=headers) + + def keepalive(self, request): + if request.params.get('close', '0') == '1': + headers = [('Connection', 'close')] + return Response('Closing', headers=headers) + + headers = [('Connection', 'keep-alive')] + return Response('Keeping alive', headers=headers) + + def sleep(self, request): + "Sleep for a specified amount of ``seconds``" + seconds = float(request.params.get('seconds', '1')) + time.sleep(seconds) + return Response() + + def echo(self, request): + "Echo back the params" + if request.method == 'GET': + return Response(request.query) + + return Response(request.body) + + def encodingrequest(self, request): + "Check for UA accepting gzip/deflate encoding" + data = b"hello, world!" + encoding = request.headers.get('Accept-Encoding', '') + headers = None + if 'gzip' in encoding: + headers = [('Content-Encoding', 'gzip')] + file_ = BytesIO() + gzip.GzipFile('', mode='w', fileobj=file_).write(data) + data = file_.getvalue() + elif 'deflate' in encoding: + headers = [('Content-Encoding', 'deflate')] + data = zlib.compress(data) + return Response(data, headers=headers) + + def shutdown(self, request): + sys.exit() diff --git a/dummyserver/server.py b/dummyserver/server.py new file mode 100755 index 0000000..529850f --- /dev/null +++ b/dummyserver/server.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python + +""" +Dummy server used for unit testing. +""" +from __future__ import print_function + +import logging +import os +import sys +import threading +import socket + +import tornado.wsgi +import tornado.httpserver +import tornado.ioloop + +from dummyserver.handlers import TestingApp + + +log = logging.getLogger(__name__) + +CERTS_PATH = os.path.join(os.path.dirname(__file__), 'certs') +DEFAULT_CERTS = { + 'certfile': os.path.join(CERTS_PATH, 'server.crt'), + 'keyfile': os.path.join(CERTS_PATH, 'server.key'), +} +DEFAULT_CA = os.path.join(CERTS_PATH, 'cacert.pem') +DEFAULT_CA_BAD = os.path.join(CERTS_PATH, 'client_bad.pem') + + +# Different types of servers we have: + + +class SocketServerThread(threading.Thread): + """ + :param socket_handler: Callable which receives a socket argument for one + request. + :param ready_lock: Lock which gets released when the socket handler is + ready to receive requests. + """ + def __init__(self, socket_handler, host='localhost', port=8081, + ready_lock=None): + threading.Thread.__init__(self) + + self.socket_handler = socket_handler + self.host = host + self.port = port + self.ready_lock = ready_lock + + def _start_server(self): + sock = socket.socket() + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind((self.host, self.port)) + + # Once listen() returns, the server socket is ready + sock.listen(1) + + if self.ready_lock: + self.ready_lock.release() + + self.socket_handler(sock) + + def run(self): + self.server = self._start_server() + + +class TornadoServerThread(threading.Thread): + def __init__(self, host='localhost', port=8081, scheme='http', certs=None): + threading.Thread.__init__(self) + + self.host = host + self.port = port + self.scheme = scheme + self.certs = certs + + def _start_server(self): + container = tornado.wsgi.WSGIContainer(TestingApp()) + + if self.scheme == 'https': + http_server = tornado.httpserver.HTTPServer(container, + ssl_options=self.certs) + else: + http_server = tornado.httpserver.HTTPServer(container) + + http_server.listen(self.port) + return http_server + + def run(self): + self.server = self._start_server() + self.ioloop = tornado.ioloop.IOLoop.instance() + self.ioloop.start() + + def stop(self): + self.server.stop() + self.ioloop.stop() + + +if __name__ == '__main__': + log.setLevel(logging.DEBUG) + log.addHandler(logging.StreamHandler(sys.stderr)) + + from urllib3 import get_host + + url = "http://localhost:8081" + if len(sys.argv) > 1: + url = sys.argv[1] + + print("Starting WGI server at: %s" % url) + + scheme, host, port = get_host(url) + t = TornadoServerThread(scheme=scheme, host=host, port=port) + t.start() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py new file mode 100644 index 0000000..518d739 --- /dev/null +++ b/dummyserver/testcase.py @@ -0,0 +1,71 @@ +import unittest + +from threading import Lock + +from dummyserver.server import ( + TornadoServerThread, SocketServerThread, + DEFAULT_CERTS, +) + + +# TODO: Change ports to auto-allocated? + + +class SocketDummyServerTestCase(unittest.TestCase): + """ + A simple socket-based server is created for this class that is good for + exactly one request. + """ + scheme = 'http' + host = 'localhost' + port = 18080 + + @classmethod + def _start_server(cls, socket_handler): + ready_lock = Lock() + ready_lock.acquire() + cls.server_thread = SocketServerThread(socket_handler=socket_handler, + ready_lock=ready_lock, + host=cls.host, port=cls.port) + cls.server_thread.start() + + # Lock gets released by thread above + ready_lock.acquire() + + +class HTTPDummyServerTestCase(unittest.TestCase): + scheme = 'http' + host = 'localhost' + host_alt = '127.0.0.1' # Some tests need two hosts + port = 18081 + certs = DEFAULT_CERTS + + @classmethod + def _start_server(cls): + cls.server_thread = TornadoServerThread(host=cls.host, port=cls.port, + scheme=cls.scheme, + certs=cls.certs) + cls.server_thread.start() + + # TODO: Loop-check here instead + import time + time.sleep(0.1) + + @classmethod + def _stop_server(cls): + cls.server_thread.stop() + + @classmethod + def setUpClass(cls): + cls._start_server() + + @classmethod + def tearDownClass(cls): + cls._stop_server() + + +class HTTPSDummyServerTestCase(HTTPDummyServerTestCase): + scheme = 'https' + host = 'localhost' + port = 18082 + certs = DEFAULT_CERTS diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..58ce3f5 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +[nosetests] +logging-clear-handlers = true +with-coverage = true +cover-package = urllib3 + +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f093f34 --- /dev/null +++ b/setup.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +from distutils.core import setup + +import os +import re + +try: + import setuptools +except ImportError: + pass # No 'develop' command, oh well. + +base_path = os.path.dirname(__file__) + +# Get the version (borrowed from SQLAlchemy) +fp = open(os.path.join(base_path, 'urllib3', '__init__.py')) +VERSION = re.compile(r".*__version__ = '(.*?)'", + re.S).match(fp.read()).group(1) +fp.close() + + +version = VERSION + +requirements = [] +tests_requirements = requirements + open('test-requirements.txt').readlines() + +setup(name='urllib3', + version=version, + description="HTTP library with thread-safe connection pooling, file post, and more.", + long_description=open('README.rst').read() + '\n\n' + open('CHANGES.rst').read(), + classifiers=[ + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 3', + 'Topic :: Internet :: WWW/HTTP', + 'Topic :: Software Development :: Libraries', + ], + keywords='urllib httplib threadsafe filepost http https ssl pooling', + author='Andrey Petrov', + author_email='andrey.petrov@shazow.net', + url='http://urllib3.readthedocs.org/', + license='MIT', + packages=['urllib3', 'dummyserver', 'urllib3.packages', + 'urllib3.packages.ssl_match_hostname', 'urllib3.packages.mimetools_choose_boundary', + ], + requires=requirements, + tests_require=tests_requirements, + test_suite='test', + ) diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 0000000..568b0d4 --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,2 @@ +nose +tornado diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/benchmark.py b/test/benchmark.py new file mode 100644 index 0000000..e7049c4 --- /dev/null +++ b/test/benchmark.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +""" +Really simple rudimentary benchmark to compare ConnectionPool versus standard +urllib to demonstrate the usefulness of connection re-using. +""" +from __future__ import print_function + +import sys +import time +import urllib + +sys.path.append('../') +import urllib3 + + +# URLs to download. Doesn't matter as long as they're from the same host, so we +# can take advantage of connection re-using. +TO_DOWNLOAD = [ + 'http://code.google.com/apis/apps/', + 'http://code.google.com/apis/base/', + 'http://code.google.com/apis/blogger/', + 'http://code.google.com/apis/calendar/', + 'http://code.google.com/apis/codesearch/', + 'http://code.google.com/apis/contact/', + 'http://code.google.com/apis/books/', + 'http://code.google.com/apis/documents/', + 'http://code.google.com/apis/finance/', + 'http://code.google.com/apis/health/', + 'http://code.google.com/apis/notebook/', + 'http://code.google.com/apis/picasaweb/', + 'http://code.google.com/apis/spreadsheets/', + 'http://code.google.com/apis/webmastertools/', + 'http://code.google.com/apis/youtube/', +] + + +def urllib_get(url_list): + assert url_list + for url in url_list: + now = time.time() + r = urllib.urlopen(url) + elapsed = time.time() - now + print("Got in %0.3f: %s" % (elapsed, url)) + + +def pool_get(url_list): + assert url_list + pool = urllib3.connection_from_url(url_list[0]) + for url in url_list: + now = time.time() + r = pool.get_url(url) + elapsed = time.time() - now + print("Got in %0.3fs: %s" % (elapsed, url)) + + +if __name__ == '__main__': + print("Running pool_get ...") + now = time.time() + pool_get(TO_DOWNLOAD) + pool_elapsed = time.time() - now + + print("Running urllib_get ...") + now = time.time() + urllib_get(TO_DOWNLOAD) + urllib_elapsed = time.time() - now + + print("Completed pool_get in %0.3fs" % pool_elapsed) + print("Completed urllib_get in %0.3fs" % urllib_elapsed) + + +""" +Example results: + +Completed pool_get in 1.163s +Completed urllib_get in 2.318s +""" diff --git a/test/test_collections.py b/test/test_collections.py new file mode 100644 index 0000000..f8275e0 --- /dev/null +++ b/test/test_collections.py @@ -0,0 +1,111 @@ +import unittest + +from urllib3._collections import RecentlyUsedContainer as Container +from urllib3.packages import six +xrange = six.moves.xrange + +class TestLRUContainer(unittest.TestCase): + def test_maxsize(self): + d = Container(5) + + for i in xrange(5): + d[i] = str(i) + + self.assertEqual(len(d), 5) + + for i in xrange(5): + self.assertEqual(d[i], str(i)) + + d[i+1] = str(i+1) + + self.assertEqual(len(d), 5) + self.assertFalse(0 in d) + self.assertTrue(i+1 in d) + + def test_expire(self): + d = Container(5) + + for i in xrange(5): + d[i] = str(i) + + for i in xrange(5): + d.get(0) + + # Add one more entry + d[5] = '5' + + # Check state + self.assertEqual(list(d.keys()), [0, 2, 3, 4, 5]) + + def test_pruning(self): + d = Container(5) + + for i in xrange(5): + d[i] = str(i) + + # Contend 2 entries for the most-used slot to balloon the heap + for i in xrange(100): + d.get(i % 2) + + self.assertTrue(len(d.access_log) <= d.CLEANUP_FACTOR * d._maxsize) + + def test_same_key(self): + d = Container(5) + + for i in xrange(10): + d['foo'] = i + + self.assertEqual(list(d.keys()), ['foo']) + + d._prune_invalidated_entries() + + self.assertEqual(len(d.access_log), 1) + + def test_access_ordering(self): + d = Container(5) + + for i in xrange(10): + d[i] = True + + self.assertEqual(d._get_ordered_access_keys(), [9,8,7,6,5]) + + new_order = [7,8,6,9,5] + for k in reversed(new_order): + d[k] + + self.assertEqual(d._get_ordered_access_keys(), new_order) + + def test_delete(self): + d = Container(5) + + for i in xrange(5): + d[i] = True + + del d[0] + self.assertFalse(0 in d) + + d.pop(1) + self.assertFalse(1 in d) + + d.pop(1, None) + + def test_get(self): + d = Container(5) + + for i in xrange(5): + d[i] = True + + r = d.get(4) + self.assertEqual(r, True) + + r = d.get(5) + self.assertEqual(r, None) + + r = d.get(5, 42) + self.assertEqual(r, 42) + + self.assertRaises(KeyError, lambda: d[5]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py new file mode 100644 index 0000000..4281d42 --- /dev/null +++ b/test/test_connectionpool.py @@ -0,0 +1,136 @@ +import unittest + +from urllib3.connectionpool import ( + connection_from_url, + get_host, + HTTPConnectionPool, + make_headers) + +from urllib3.exceptions import EmptyPoolError, LocationParseError + + +class TestConnectionPool(unittest.TestCase): + def test_get_host(self): + url_host_map = { + 'http://google.com/mail': ('http', 'google.com', None), + 'http://google.com/mail/': ('http', 'google.com', None), + 'google.com/mail': ('http', 'google.com', None), + 'http://google.com/': ('http', 'google.com', None), + 'http://google.com': ('http', 'google.com', None), + 'http://www.google.com': ('http', 'www.google.com', None), + 'http://mail.google.com': ('http', 'mail.google.com', None), + 'http://google.com:8000/mail/': ('http', 'google.com', 8000), + 'http://google.com:8000': ('http', 'google.com', 8000), + 'https://google.com': ('https', 'google.com', None), + 'https://google.com:8000': ('https', 'google.com', 8000), + 'http://user:password@127.0.0.1:1234': ('http', '127.0.0.1', 1234), + } + for url, expected_host in url_host_map.items(): + returned_host = get_host(url) + self.assertEquals(returned_host, expected_host) + + def test_same_host(self): + same_host = [ + ('http://google.com/', '/'), + ('http://google.com/', 'http://google.com/'), + ('http://google.com/', 'http://google.com'), + ('http://google.com/', 'http://google.com/abra/cadabra'), + ('http://google.com:42/', 'http://google.com:42/abracadabra'), + ] + + for a, b in same_host: + c = connection_from_url(a) + self.assertTrue(c.is_same_host(b), "%s =? %s" % (a, b)) + + not_same_host = [ + ('https://google.com/', 'http://google.com/'), + ('http://google.com/', 'https://google.com/'), + ('http://yahoo.com/', 'http://google.com/'), + ('http://google.com:42', 'https://google.com/abracadabra'), + ('http://google.com', 'https://google.net/'), + ] + + for a, b in not_same_host: + c = connection_from_url(a) + self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) + + def test_invalid_host(self): + # TODO: Add more tests + invalid_host = [ + 'http://google.com:foo', + ] + + for location in invalid_host: + self.assertRaises(LocationParseError, get_host, location) + + + def test_make_headers(self): + self.assertEqual( + make_headers(accept_encoding=True), + {'accept-encoding': 'gzip,deflate'}) + + self.assertEqual( + make_headers(accept_encoding='foo,bar'), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=['foo', 'bar']), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=True, user_agent='banana'), + {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}) + + self.assertEqual( + make_headers(user_agent='banana'), + {'user-agent': 'banana'}) + + self.assertEqual( + make_headers(keep_alive=True), + {'connection': 'keep-alive'}) + + self.assertEqual( + make_headers(basic_auth='foo:bar'), + {'authorization': 'Basic Zm9vOmJhcg=='}) + + def test_max_connections(self): + pool = HTTPConnectionPool(host='localhost', maxsize=1, block=True) + + pool._get_conn(timeout=0.01) + + try: + pool._get_conn(timeout=0.01) + self.fail("Managed to get a connection without EmptyPoolError") + except EmptyPoolError: + pass + + try: + pool.get_url('/', pool_timeout=0.01) + self.fail("Managed to get a connection without EmptyPoolError") + except EmptyPoolError: + pass + + self.assertEqual(pool.num_connections, 1) + + def test_pool_edgecases(self): + pool = HTTPConnectionPool(host='localhost', maxsize=1, block=False) + + conn1 = pool._get_conn() + conn2 = pool._get_conn() # New because block=False + + pool._put_conn(conn1) + pool._put_conn(conn2) # Should be discarded + + self.assertEqual(conn1, pool._get_conn()) + self.assertNotEqual(conn2, pool._get_conn()) + + self.assertEqual(pool.num_connections, 3) + + def test_exception_str(self): + self.assertEqual( + str(EmptyPoolError(HTTPConnectionPool(host='localhost'), "Test.")), + "HTTPConnectionPool(host='localhost', port=None): Test.") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py new file mode 100644 index 0000000..12722f7 --- /dev/null +++ b/test/test_poolmanager.py @@ -0,0 +1,47 @@ +import unittest + +from urllib3.poolmanager import PoolManager +from urllib3 import connection_from_url + + +class TestPoolManager(unittest.TestCase): + def test_same_url(self): + # Convince ourselves that normally we don't get the same object + conn1 = connection_from_url('http://localhost:8081/foo') + conn2 = connection_from_url('http://localhost:8081/bar') + + self.assertNotEqual(conn1, conn2) + + # Now try again using the PoolManager + p = PoolManager(1) + + conn1 = p.connection_from_url('http://localhost:8081/foo') + conn2 = p.connection_from_url('http://localhost:8081/bar') + + self.assertEqual(conn1, conn2) + + def test_many_urls(self): + urls = [ + "http://localhost:8081/foo", + "http://www.google.com/mail", + "http://localhost:8081/bar", + "https://www.google.com/", + "https://www.google.com/mail", + "http://yahoo.com", + "http://bing.com", + "http://yahoo.com/", + ] + + connections = set() + + p = PoolManager(10) + + for url in urls: + conn = p.connection_from_url(url) + connections.add(conn) + + self.assertEqual(len(connections), 5) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_response.py b/test/test_response.py new file mode 100644 index 0000000..0ef379c --- /dev/null +++ b/test/test_response.py @@ -0,0 +1,68 @@ +import unittest +import zlib + +from io import BytesIO + +from urllib3.response import HTTPResponse + +class TestLegacyResponse(unittest.TestCase): + def test_getheaders(self): + headers = {'host': 'example.com'} + r = HTTPResponse(headers=headers) + self.assertEqual(r.getheaders(), headers) + + def test_getheader(self): + headers = {'host': 'example.com'} + r = HTTPResponse(headers=headers) + self.assertEqual(r.getheader('host'), 'example.com') + + +class TestResponse(unittest.TestCase): + def test_cache_content(self): + r = HTTPResponse('foo') + self.assertEqual(r.data, 'foo') + self.assertEqual(r._body, 'foo') + + def test_default(self): + r = HTTPResponse() + self.assertEqual(r.data, None) + + def test_none(self): + r = HTTPResponse(None) + self.assertEqual(r.data, None) + + def test_preload(self): + fp = BytesIO(b'foo') + + r = HTTPResponse(fp, preload_content=True) + + self.assertEqual(fp.tell(), len(b'foo')) + self.assertEqual(r.data, b'foo') + + def test_no_preload(self): + fp = BytesIO(b'foo') + + r = HTTPResponse(fp, preload_content=False) + + self.assertEqual(fp.tell(), 0) + self.assertEqual(r.data, b'foo') + self.assertEqual(fp.tell(), len(b'foo')) + + def test_decode_bad_data(self): + fp = BytesIO(b'\x00' * 10) + self.assertRaises(zlib.error, HTTPResponse, fp, headers={ + 'content-encoding': 'deflate' + }) + + def test_decode_deflate(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate'}) + + self.assertEqual(r.data, b'foo') + + +if __name__ == '__main__': + unittest.main() diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO new file mode 100644 index 0000000..2f0ab55 --- /dev/null +++ b/urllib3.egg-info/PKG-INFO @@ -0,0 +1,265 @@ +Metadata-Version: 1.0 +Name: urllib3 +Version: 1.2.2 +Summary: HTTP library with thread-safe connection pooling, file post, and more. +Home-page: http://urllib3.readthedocs.org/ +Author: Andrey Petrov +Author-email: andrey.petrov@shazow.net +License: MIT +Description: Highlights + ========== + + - Re-use the same socket connection for multiple requests + (``HTTPConnectionPool`` and ``HTTPSConnectionPool``) + (with optional client-side certificate verification). + - File posting (``encode_multipart_formdata``). + - Built-in redirection and retries (optional). + - Supports gzip and deflate decoding. + - Thread-safe and sanity-safe. + - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Small and easy to understand codebase perfect for extending and building upon. + For a more comprehensive solution, have a look at + `Requests `_ which is also powered by urllib3. + + What's wrong with urllib and urllib2? + ===================================== + + There are two critical features missing from the Python standard library: + Connection re-using/pooling and file posting. It's not terribly hard to + implement these yourself, but it's much easier to use a module that already + did the work for you. + + The Python standard libraries ``urllib`` and ``urllib2`` have little to do + with each other. They were designed to be independent and standalone, each + solving a different scope of problems, and ``urllib3`` follows in a similar + vein. + + Why do I want to reuse connections? + =================================== + + Performance. When you normally do a urllib call, a separate socket + connection is created with each request. By reusing existing sockets + (supported since HTTP 1.1), the requests will take up less resources on the + server's end, and also provide a faster response time at the client's end. + With some simple benchmarks (see `test/benchmark.py + `_ + ), downloading 15 URLs from google.com is about twice as fast when using + HTTPConnectionPool (which uses 1 connection) than using plain urllib (which + uses 15 connections). + + This library is perfect for: + + - Talking to an API + - Crawling a website + - Any situation where being able to post files, handle redirection, and + retrying is useful. It's relatively lightweight, so it can be used for + anything! + + Examples + ======== + + Go to `urllib3.readthedocs.org `_ + for more nice syntax-highlighted examples. + + But, long story short:: + + import urllib3 + + http = urllib3.PoolManager() + + r = http.request('GET', 'http://google.com/') + + print r.status, r.data + + The ``PoolManager`` will take care of reusing connections for you whenever + you request the same host. For more fine-grained control of your connection + pools, you should look at + `ConnectionPool `_. + + + Run the tests + ============= + + We use some external dependencies to run the urllib3 test suite. Easiest way to + run the tests is thusly from the urllib3 source root: :: + + $ pip install -r test-requirements.txt + $ nosetests + ..................................................... + + Success! You could also ``pip install coverage`` to get code coverage reporting. + + + Contributing + ============ + + #. `Check for open issues `_ or open + a fresh issue to start a discussion around a feature idea or a bug. There is + a *Contributor Friendly* tag for issues that should be ideal for people who + are not very familiar with the codebase yet. + #. Fork the `urllib3 repository on Github `_ + to start making your changes. + #. Write a test which shows that the bug was fixed or that the feature works + as expected. + #. Send a pull request and bug the maintainer until it gets merged and published. + :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + + + Changes + ======= + + + 1.2.2 (2012-02-06) + ++++++++++++++++++ + + * Fixed packaging bug of not shipping ``test-requirements.txt``. (Issue #47) + + + 1.2.1 (2012-02-05) + ++++++++++++++++++ + + * Fixed another bug related to when ``ssl`` module is not available. (Issue #41) + + * Location parsing errors now raise ``urllib3.exceptions.LocationParseError`` + which inherits from ``ValueError``. + + + 1.2 (2012-01-29) + ++++++++++++++++ + + * Added Python 3 support (tested on 3.2.2) + + * Dropped Python 2.5 support (tested on 2.6.7, 2.7.2) + + * Use ``select.poll`` instead of ``select.select`` for platforms that support + it. + + * Use ``Queue.LifoQueue`` instead of ``Queue.Queue`` for more aggressive + connection reusing. Configurable by overriding ``ConnectionPool.QueueCls``. + + * Fixed ``ImportError`` during install when ``ssl`` module is not available. + (Issue #41) + + * Fixed ``PoolManager`` redirects between schemes (such as HTTP -> HTTPS) not + completing properly. (Issue #28, uncovered by Issue #10 in v1.1) + + * Ported ``dummyserver`` to use ``tornado`` instead of ``webob`` + + ``eventlet``. Removed extraneous unsupported dummyserver testing backends. + Added socket-level tests. + + * More tests. Achievement Unlocked: 99% Coverage. + + + 1.1 (2012-01-07) + ++++++++++++++++ + + * Refactored ``dummyserver`` to its own root namespace module (used for + testing). + + * Added hostname verification for ``VerifiedHTTPSConnection`` by vendoring in + Py32's ``ssl_match_hostname``. (Issue #25) + + * Fixed cross-host HTTP redirects when using ``PoolManager``. (Issue #10) + + * Fixed ``decode_content`` being ignored when set through ``urlopen``. (Issue + #27) + + * Fixed timeout-related bugs. (Issues #17, #23) + + + 1.0.2 (2011-11-04) + ++++++++++++++++++ + + * Fixed typo in ``VerifiedHTTPSConnection`` which would only present as a bug if + you're using the object manually. (Thanks pyos) + + * Made RecentlyUsedContainer (and consequently PoolManager) more thread-safe by + wrapping the access log in a mutex. (Thanks @christer) + + * Made RecentlyUsedContainer more dict-like (corrected ``__delitem__`` and + ``__getitem__`` behaviour), with tests. Shouldn't affect core urllib3 code. + + + 1.0.1 (2011-10-10) + ++++++++++++++++++ + + * Fixed a bug where the same connection would get returned into the pool twice, + causing extraneous "HttpConnectionPool is full" log warnings. + + + 1.0 (2011-10-08) + ++++++++++++++++ + + * Added ``PoolManager`` with LRU expiration of connections (tested and + documented). + * Added ``ProxyManager`` (needs tests, docs, and confirmation that it works + with HTTPS proxies). + * Added optional partial-read support for responses when + ``preload_content=False``. You can now make requests and just read the headers + without loading the content. + * Made response decoding optional (default on, same as before). + * Added optional explicit boundary string for ``encode_multipart_formdata``. + * Convenience request methods are now inherited from ``RequestMethods``. Old + helpers like ``get_url`` and ``post_url`` should be abandoned in favour of + the new ``request(method, url, ...)``. + * Refactored code to be even more decoupled, reusable, and extendable. + * License header added to ``.py`` files. + * Embiggened the documentation: Lots of Sphinx-friendly docstrings in the code + and docs in ``docs/`` and on urllib3.readthedocs.org. + * Embettered all the things! + * Started writing this file. + + + 0.4.1 (2011-07-17) + ++++++++++++++++++ + + * Minor bug fixes, code cleanup. + + + 0.4 (2011-03-01) + ++++++++++++++++ + + * Better unicode support. + * Added ``VerifiedHTTPSConnection``. + * Added ``NTLMConnectionPool`` in contrib. + * Minor improvements. + + + 0.3.1 (2010-07-13) + ++++++++++++++++++ + + * Added ``assert_host_name`` optional parameter. Now compatible with proxies. + + + 0.3 (2009-12-10) + ++++++++++++++++ + + * Added HTTPS support. + * Minor bug fixes. + * Refactored, broken backwards compatibility with 0.2. + * API to be treated as stable from this version forward. + + + 0.2 (2008-11-17) + ++++++++++++++++ + + * Added unit tests. + * Bug fixes. + + + 0.1 (2008-11-16) + ++++++++++++++++ + + * First release. + +Keywords: urllib httplib threadsafe filepost http https ssl pooling +Platform: UNKNOWN +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Software Development :: Libraries diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt new file mode 100644 index 0000000..d7cbb3d --- /dev/null +++ b/urllib3.egg-info/SOURCES.txt @@ -0,0 +1,36 @@ +CHANGES.rst +CONTRIBUTORS.txt +LICENSE.txt +MANIFEST.in +README.rst +setup.cfg +setup.py +test-requirements.txt +dummyserver/__init__.py +dummyserver/handlers.py +dummyserver/server.py +dummyserver/testcase.py +test/__init__.py +test/benchmark.py +test/test_collections.py +test/test_connectionpool.py +test/test_poolmanager.py +test/test_response.py +urllib3/__init__.py +urllib3/_collections.py +urllib3/connectionpool.py +urllib3/exceptions.py +urllib3/filepost.py +urllib3/poolmanager.py +urllib3/request.py +urllib3/response.py +urllib3.egg-info/PKG-INFO +urllib3.egg-info/SOURCES.txt +urllib3.egg-info/dependency_links.txt +urllib3.egg-info/top_level.txt +urllib3/contrib/__init__.py +urllib3/contrib/ntlmpool.py +urllib3/packages/__init__.py +urllib3/packages/six.py +urllib3/packages/mimetools_choose_boundary/__init__.py +urllib3/packages/ssl_match_hostname/__init__.py \ No newline at end of file diff --git a/urllib3.egg-info/dependency_links.txt b/urllib3.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/urllib3.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/urllib3.egg-info/top_level.txt b/urllib3.egg-info/top_level.txt new file mode 100644 index 0000000..93675d9 --- /dev/null +++ b/urllib3.egg-info/top_level.txt @@ -0,0 +1,2 @@ +urllib3 +dummyserver diff --git a/urllib3/__init__.py b/urllib3/__init__.py new file mode 100644 index 0000000..2e9c663 --- /dev/null +++ b/urllib3/__init__.py @@ -0,0 +1,48 @@ +# urllib3/__init__.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +urllib3 - Thread-safe connection pooling and re-using. +""" + +__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' +__license__ = 'MIT' +__version__ = '1.2.2' + + +from .connectionpool import ( + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url, + get_host, + make_headers) + + +from .exceptions import ( + HTTPError, + MaxRetryError, + SSLError, + TimeoutError) + +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .filepost import encode_multipart_formdata + + +# Set default logging handler to avoid "No handler found" warnings. +import logging +try: + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +logging.getLogger(__name__).addHandler(NullHandler()) + +# ... Clean up. +del logging +del NullHandler diff --git a/urllib3/_collections.py b/urllib3/_collections.py new file mode 100644 index 0000000..3cef081 --- /dev/null +++ b/urllib3/_collections.py @@ -0,0 +1,131 @@ +# urllib3/_collections.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from collections import deque + +from threading import RLock + +__all__ = ['RecentlyUsedContainer'] + + +class AccessEntry(object): + __slots__ = ('key', 'is_valid') + + def __init__(self, key, is_valid=True): + self.key = key + self.is_valid = is_valid + + +class RecentlyUsedContainer(dict): + """ + Provides a dict-like that maintains up to ``maxsize`` keys while throwing + away the least-recently-used keys beyond ``maxsize``. + """ + + # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we + # will attempt to cleanup the invalidated entries in the access_log + # datastructure during the next 'get' operation. + CLEANUP_FACTOR = 10 + + def __init__(self, maxsize=10): + self._maxsize = maxsize + + self._container = {} + + # We use a deque to to store our keys ordered by the last access. + self.access_log = deque() + self.access_log_lock = RLock() + + # We look up the access log entry by the key to invalidate it so we can + # insert a new authorative entry at the head without having to dig and + # find the old entry for removal immediately. + self.access_lookup = {} + + # Trigger a heap cleanup when we get past this size + self.access_log_limit = maxsize * self.CLEANUP_FACTOR + + def _invalidate_entry(self, key): + "If exists: Invalidate old entry and return it." + old_entry = self.access_lookup.get(key) + if old_entry: + old_entry.is_valid = False + + return old_entry + + def _push_entry(self, key): + "Push entry onto our access log, invalidate the old entry if exists." + self._invalidate_entry(key) + + new_entry = AccessEntry(key) + self.access_lookup[key] = new_entry + + self.access_log_lock.acquire() + self.access_log.appendleft(new_entry) + self.access_log_lock.release() + + def _prune_entries(self, num): + "Pop entries from our access log until we popped ``num`` valid ones." + while num > 0: + self.access_log_lock.acquire() + p = self.access_log.pop() + self.access_log_lock.release() + + if not p.is_valid: + continue # Invalidated entry, skip + + dict.pop(self, p.key, None) + self.access_lookup.pop(p.key, None) + num -= 1 + + def _prune_invalidated_entries(self): + "Rebuild our access_log without the invalidated entries." + self.access_log_lock.acquire() + self.access_log = deque(e for e in self.access_log if e.is_valid) + self.access_log_lock.release() + + def _get_ordered_access_keys(self): + "Return ordered access keys for inspection. Used for testing." + self.access_log_lock.acquire() + r = [e.key for e in self.access_log if e.is_valid] + self.access_log_lock.release() + + return r + + def __getitem__(self, key): + item = dict.get(self, key) + + if not item: + raise KeyError(key) + + # Insert new entry with new high priority, also implicitly invalidates + # the old entry. + self._push_entry(key) + + if len(self.access_log) > self.access_log_limit: + # Heap is getting too big, try to clean up any tailing invalidated + # entries. + self._prune_invalidated_entries() + + return item + + def __setitem__(self, key, item): + # Add item to our container and access log + dict.__setitem__(self, key, item) + self._push_entry(key) + + # Discard invalid and excess entries + self._prune_entries(len(self) - self._maxsize) + + def __delitem__(self, key): + self._invalidate_entry(key) + self.access_lookup.pop(key, None) + dict.__delitem__(self, key) + + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py new file mode 100644 index 0000000..39e652e --- /dev/null +++ b/urllib3/connectionpool.py @@ -0,0 +1,629 @@ +# urllib3/connectionpool.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import logging +import socket + +from base64 import b64encode +from socket import error as SocketError, timeout as SocketTimeout + +try: + from select import poll, POLLIN +except ImportError: # Doesn't exist on OSX and other platforms + from select import select + poll = False + +try: # Python 3 + from http.client import HTTPConnection, HTTPException + from http.client import HTTP_PORT, HTTPS_PORT +except ImportError: + from httplib import HTTPConnection, HTTPException + from httplib import HTTP_PORT, HTTPS_PORT + +try: # Python 3 + from queue import LifoQueue, Empty, Full +except ImportError: + from Queue import LifoQueue, Empty, Full + + +try: # Compiled with SSL? + HTTPSConnection = object + BaseSSLError = None + ssl = None + + try: # Python 3 + from http.client import HTTPSConnection + except ImportError: + from httplib import HTTPSConnection + + import ssl + BaseSSLError = ssl.SSLError + +except ImportError: + pass + + +from .packages.ssl_match_hostname import match_hostname, CertificateError +from .request import RequestMethods +from .response import HTTPResponse +from .exceptions import ( + EmptyPoolError, + HostChangedError, + LocationParseError, + MaxRetryError, + SSLError, + TimeoutError, +) + +from .packages.ssl_match_hostname import match_hostname, CertificateError +from .packages import six + +xrange = six.moves.xrange + +log = logging.getLogger(__name__) + +_Default = object() + +port_by_scheme = { + 'http': HTTP_PORT, + 'https': HTTPS_PORT, +} + +## Connection objects (extension of httplib) + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + ssl_req_scheme = { + 'CERT_NONE': ssl.CERT_NONE, + 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, + 'CERT_REQUIRED': ssl.CERT_REQUIRED + } + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.ca_certs = ca_certs + + def connect(self): + # Add certificate verification + sock = socket.create_connection((self.host, self.port), self.timeout) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs) + if self.ca_certs: + match_hostname(self.sock.getpeercert(), self.host) + +## Pool objects + +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + + scheme = None + QueueCls = LifoQueue + + def __init__(self, host, port=None): + self.host = host + self.port = port + + def __str__(self): + return '%s(host=%r, port=%r)' % (type(self).__name__, + self.host, self.port) + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + :param timeout: + Socket timeout for each individual connection, can be a float. None + disables timeout. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to false, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + scheme = 'http' + + def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, + block=False, headers=None): + super(HTTPConnectionPool, self).__init__(host, port) + + self.strict = strict + self.timeout = timeout + self.pool = self.QueueCls(maxsize) + self.block = block + self.headers = headers or {} + + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTP connection (%d): %s" % + (self.num_connections, self.host)) + return HTTPConnection(host=self.host, port=self.port) + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + # If this is a persistent connection, check if it got disconnected + if conn and conn.sock and is_connection_dropped(conn): + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + + except Empty: + if self.block: + raise EmptyPoolError(self, + "Pool reached maximum size and no more " + "connections are allowed.") + pass # Oh well, we'll create a new connection then + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is discarded because we + exceeded maxsize. If connections are discarded frequently, then maxsize + should be increased. + """ + try: + self.pool.put(conn, block=False) + except Full: + # This should never happen if self.block == True + log.warning("HttpConnectionPool is full, discarding connection: %s" + % self.host) + + def _make_request(self, conn, method, url, timeout=_Default, + **httplib_request_kw): + """ + Perform a request on a given httplib connection object taken from our + pool. + """ + self.num_requests += 1 + + if timeout is _Default: + timeout = self.timeout + + conn.timeout = timeout # This only does anything in Py26+ + + conn.request(method, url, **httplib_request_kw) + conn.sock.settimeout(timeout) + httplib_response = conn.getresponse() + + log.debug("\"%s %s %s\" %s %s" % + (method, url, + conn._http_vsn_str, # pylint: disable-msg=W0212 + httplib_response.status, httplib_response.length)) + + return httplib_response + + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + # TODO: Add optional support for socket.gethostbyname checking. + scheme, host, port = get_host(url) + + if self.port and not port: + # Use explicit default port for comparison when none is given. + port = port_by_scheme.get(scheme) + + return (url.startswith('/') or + (scheme, host, port) == (self.scheme, self.host, self.port)) + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True, timeout=_Default, + pool_timeout=None, release_conn=None, **response_kw): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`.request`. + + .. note:: + + `release_conn` will only behave as expected if + `preload_content=False` because we want to make + `preload_content=False` the default behaviour someday soon without + breaking backwards compatibility. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Number of retries to allow before raising a MaxRetryError exception. + + :param redirect: + Automatically handle redirects (status codes 301, 302, 303, 307), + each redirect counts as a retry. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When False, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one request. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received (but will release if + you read the entire contents of the response such as when + `preload_content=True`). This is useful if you're not preloading + the response's content immediately. You will need to call + ``r.release_conn()`` on the response ``r`` to return the connection + back into the pool. If None, it takes the value of + ``response_kw.get('preload_content', True)``. + + :param \**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + + if retries < 0: + raise MaxRetryError(self, url) + + if timeout is _Default: + timeout = self.timeout + + if release_conn is None: + release_conn = response_kw.get('preload_content', True) + + # Check host + if assert_same_host and not self.is_same_host(url): + host = "%s://%s" % (self.scheme, self.host) + if self.port: + host = "%s:%d" % (host, self.port) + + raise HostChangedError(self, url, retries - 1) + + conn = None + + try: + # Request a connection from the queue + # (Could raise SocketError: Bad file descriptor) + conn = self._get_conn(timeout=pool_timeout) + + # Make the request on the httplib connection object + httplib_response = self._make_request(conn, method, url, + timeout=timeout, + body=body, headers=headers) + + # If we're going to release the connection in ``finally:``, then + # the request doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = not release_conn and conn + + # Import httplib's response into our own wrapper object + response = HTTPResponse.from_httplib(httplib_response, + pool=self, + connection=response_conn, + **response_kw) + + # else: + # The connection will be put back into the pool when + # ``response.release_conn()`` is called (implicitly by + # ``response.read()``) + + except Empty as e: + # Timed out by queue + raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" % + pool_timeout) + + except SocketTimeout as e: + # Timed out by socket + raise TimeoutError(self, "Request timed out. (timeout=%s)" % + timeout) + + except BaseSSLError as e: + # SSL certificate error + raise SSLError(e) + + except CertificateError as e: + # Name mismatch + raise SSLError(e) + + except (HTTPException, SocketError) as e: + # Connection broken, discard. It will be replaced next _get_conn(). + conn = None + # This is necessary so we can access e below + err = e + + finally: + if conn and release_conn: + # Put the connection back to be reused + self._put_conn(conn) + + if not conn: + log.warn("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, err, url)) + return self.urlopen(method, url, body, headers, retries - 1, + redirect, assert_same_host) # Try again + + # Handle redirect? + redirect_location = redirect and response.get_redirect_location() + if redirect_location: + log.info("Redirecting %s -> %s" % (url, redirect_location)) + return self.urlopen(method, redirect_location, body, headers, + retries - 1, redirect, assert_same_host) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:httplib.HTTPSConnection`. + + The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters + are only used if :mod:`ssl` is available and are fed into + :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + """ + + scheme = 'https' + + def __init__(self, host, port=None, + strict=False, timeout=None, maxsize=1, + block=False, headers=None, + key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + + super(HTTPSConnectionPool, self).__init__(host, port, + strict, timeout, maxsize, + block, headers) + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPSConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTPS connection (%d): %s" + % (self.num_connections, self.host)) + + if not ssl: # Platform-specific: Python compiled without +ssl + if not HTTPSConnection or HTTPSConnection is object: + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + + return HTTPSConnection(host=self.host, port=self.port) + + connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + return connection + + +## Helpers + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: :: + + >>> get_host('http://google.com/mail/') + ('http', 'google.com', None) + >>> get_host('google.com:80') + ('http', 'google.com', 80) + """ + + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + + if '://' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, _path = url.split('/', 1) + if '@' in url: + _auth, url = url.split('@', 1) + if ':' in url: + url, port = url.split(':', 1) + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s") + + port = int(port) + + return scheme, url, port + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example: :: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) + + +def is_connection_dropped(conn): + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + ``HTTPConnection`` object. + """ + if not poll: # Platform-specific + return select([conn.sock], [], [], 0.0)[0] + + # This version is better on platforms that support it. + p = poll() + p.register(conn.sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == conn.sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py new file mode 100644 index 0000000..bb41fd1 --- /dev/null +++ b/urllib3/contrib/ntlmpool.py @@ -0,0 +1,120 @@ +# urllib3/contrib/ntlmpool.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +try: + from http.client import HTTPSConnection +except ImportError: + from httplib import HTTPSConnection +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py new file mode 100644 index 0000000..15c9699 --- /dev/null +++ b/urllib3/exceptions.py @@ -0,0 +1,67 @@ +# urllib3/exceptions.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +## Base Exceptions + +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class PoolError(HTTPError): + "Base exception for errors caused within a pool." + def __init__(self, pool, message): + self.pool = pool + HTTPError.__init__(self, "%s: %s" % (pool, message)) + + +class SSLError(HTTPError): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +## Leaf Exceptions + +class MaxRetryError(PoolError): + "Raised when the maximum number of retries is exceeded." + + def __init__(self, pool, url): + message = "Max retries exceeded with url: %s" % url + PoolError.__init__(self, pool, message) + + self.url = url + + +class HostChangedError(PoolError): + "Raised when an existing pool gets a request for a foreign host." + + def __init__(self, pool, url, retries=3): + message = "Tried to open a foreign host with url: %s" % url + PoolError.__init__(self, pool, message) + + self.url = url + self.retries = retries + + +class TimeoutError(PoolError): + "Raised when a socket timeout occurs." + pass + + +class EmptyPoolError(PoolError): + "Raised when a pool runs out of connections and no more are allowed." + pass + + +class LocationParseError(ValueError, HTTPError): + "Raised when get_host or similar fails to parse the URL input." + + def __init__(self, location): + message = "Failed to parse: %s" % location + super(LocationParseError, self).__init__(self, message) + + self.location = location diff --git a/urllib3/filepost.py b/urllib3/filepost.py new file mode 100644 index 0000000..e1ec8af --- /dev/null +++ b/urllib3/filepost.py @@ -0,0 +1,74 @@ +# urllib3/filepost.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import codecs +import mimetypes + +try: + from mimetools import choose_boundary +except ImportError: + from .packages.mimetools_choose_boundary import choose_boundary + +from io import BytesIO + +from .packages import six +from .packages.six import b + +writer = codecs.lookup('utf-8')[3] + + +def get_content_type(filename): + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data mime format. + + :param fields: + Dictionary of fields. The key is treated as the field name, and the + value as the body of the form-data. If the value is a tuple of two + elements, then the first element is treated as the filename of the + form-data section. + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`mimetools.choose_boundary`. + """ + body = BytesIO() + if boundary is None: + boundary = choose_boundary() + + for fieldname, value in six.iteritems(fields): + body.write(b('--%s\r\n' % (boundary))) + + if isinstance(value, tuple): + filename, data = value + writer(body).write('Content-Disposition: form-data; name="%s"; ' + 'filename="%s"\r\n' % (fieldname, filename)) + body.write(b('Content-Type: %s\r\n\r\n' % + (get_content_type(filename)))) + else: + data = value + writer(body).write('Content-Disposition: form-data; name="%s"\r\n' + % (fieldname)) + body.write(b'Content-Type: text/plain\r\n\r\n') + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, six.text_type): + writer(body).write(data) + else: + body.write(data) + + body.write(b'\r\n') + + body.write(b('--%s--\r\n' % (boundary))) + + content_type = b('multipart/form-data; boundary=%s' % boundary) + + return body.getvalue(), content_type diff --git a/urllib3/packages/__init__.py b/urllib3/packages/__init__.py new file mode 100644 index 0000000..37e8351 --- /dev/null +++ b/urllib3/packages/__init__.py @@ -0,0 +1,4 @@ +from __future__ import absolute_import + +from . import ssl_match_hostname + diff --git a/urllib3/packages/mimetools_choose_boundary/__init__.py b/urllib3/packages/mimetools_choose_boundary/__init__.py new file mode 100644 index 0000000..a0109ab --- /dev/null +++ b/urllib3/packages/mimetools_choose_boundary/__init__.py @@ -0,0 +1,47 @@ +"""The function mimetools.choose_boundary() from Python 2.7, which seems to +have disappeared in Python 3 (although email.generator._make_boundary() might +work as a replacement?). + +Tweaked to use lock from threading rather than thread. +""" +import os +from threading import Lock +_counter_lock = Lock() + +_counter = 0 +def _get_next_counter(): + global _counter + with _counter_lock: + _counter += 1 + return _counter + +_prefix = None + +def choose_boundary(): + """Return a string usable as a multipart boundary. + + The string chosen is unique within a single program run, and + incorporates the user id (if available), process id (if available), + and current time. So it's very unlikely the returned string appears + in message text, but there's no guarantee. + + The boundary contains dots so you have to quote it in the header.""" + + global _prefix + import time + if _prefix is None: + import socket + try: + hostid = socket.gethostbyname(socket.gethostname()) + except socket.gaierror: + hostid = '127.0.0.1' + try: + uid = repr(os.getuid()) + except AttributeError: + uid = '1' + try: + pid = repr(os.getpid()) + except AttributeError: + pid = '1' + _prefix = hostid + '.' + uid + '.' + pid + return "%s.%.3f.%d" % (_prefix, time.time(), _get_next_counter()) diff --git a/urllib3/packages/six.py b/urllib3/packages/six.py new file mode 100644 index 0000000..a64f6fb --- /dev/null +++ b/urllib3/packages/six.py @@ -0,0 +1,372 @@ +"""Utilities for writing code that runs on Python 2 and 3""" + +#Copyright (c) 2010-2011 Benjamin Peterson + +#Permission is hereby granted, free of charge, to any person obtaining a copy of +#this software and associated documentation files (the "Software"), to deal in +#the Software without restriction, including without limitation the rights to +#use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +#the Software, and to permit persons to whom the Software is furnished to do so, +#subject to the following conditions: + +#The above copyright notice and this permission notice shall be included in all +#copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +#FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +#COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +#IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +#CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.1.0" + + +# True if we are running on Python 3. +PY3 = sys.version_info[0] == 3 + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) + # This is a bit ugly, but it avoids running this again. + delattr(tp, self.name) + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + + +class _MovedItems(types.ModuleType): + """Lazy loading of moved objects""" + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("reload_module", "__builtin__", "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("copyreg", "copy_reg"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("winreg", "_winreg"), +] +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) +del attr + +moves = sys.modules["six.moves"] = _MovedItems("moves") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_code = "__code__" + _func_defaults = "__defaults__" + + _iterkeys = "keys" + _itervalues = "values" + _iteritems = "items" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_code = "func_code" + _func_defaults = "func_defaults" + + _iterkeys = "iterkeys" + _itervalues = "itervalues" + _iteritems = "iteritems" + + +if PY3: + def get_unbound_function(unbound): + return unbound + + + advance_iterator = next + + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) +else: + def get_unbound_function(unbound): + return unbound.im_func + + + def advance_iterator(it): + return it.next() + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) + + +def iterkeys(d): + """Return an iterator over the keys of a dictionary.""" + return getattr(d, _iterkeys)() + +def itervalues(d): + """Return an iterator over the values of a dictionary.""" + return getattr(d, _itervalues)() + +def iteritems(d): + """Return an iterator over the (key, value) pairs of a dictionary.""" + return getattr(d, _iteritems)() + + +if PY3: + def b(s): + return s.encode("latin-1") + def u(s): + return s + if sys.version_info[1] <= 1: + def int2byte(i): + return bytes((i,)) + else: + # This is about 2x faster than the implementation above on 3.2+ + int2byte = operator.methodcaller("to_bytes", 1, "big") + import io + StringIO = io.StringIO + BytesIO = io.BytesIO +else: + def b(s): + return s + def u(s): + return unicode(s, "unicode_escape") + int2byte = chr + import StringIO + StringIO = BytesIO = StringIO.StringIO +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +if PY3: + import builtins + exec_ = getattr(builtins, "exec") + + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + + print_ = getattr(builtins, "print") + del builtins + +else: + def exec_(code, globs=None, locs=None): + """Execute code in a namespace.""" + if globs is None: + frame = sys._getframe(1) + globs = frame.f_globals + if locs is None: + locs = frame.f_locals + del frame + elif locs is None: + locs = globs + exec("""exec code in globs, locs""") + + + exec_("""def reraise(tp, value, tb=None): + raise tp, value, tb +""") + + + def print_(*args, **kwargs): + """The new-style print function.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + def write(data): + if not isinstance(data, basestring): + data = str(data) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) + +_add_doc(reraise, """Reraise an exception.""") + + +def with_metaclass(meta, base=object): + """Create a base class with a metaclass.""" + return meta("NewBase", (base,), {}) diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py new file mode 100644 index 0000000..9560b04 --- /dev/null +++ b/urllib3/packages/ssl_match_hostname/__init__.py @@ -0,0 +1,61 @@ +"""The match_hostname() function from Python 3.2, essential when using SSL.""" + +import re + +__version__ = '3.2.2' + +class CertificateError(ValueError): + pass + +def _dnsname_to_pat(dn): + pats = [] + for frag in dn.split(r'.'): + if frag == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + else: + # Otherwise, '*' matches any dotless fragment. + frag = re.escape(frag) + pats.append(frag.replace(r'\*', '[^.]*')) + return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules + are mostly followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py new file mode 100644 index 0000000..d42f35b --- /dev/null +++ b/urllib3/poolmanager.py @@ -0,0 +1,138 @@ +# urllib3/poolmanager.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import logging + +from ._collections import RecentlyUsedContainer +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connectionpool import get_host, connection_from_url, port_by_scheme +from .exceptions import HostChangedError +from .request import RequestMethods + + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] + + +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, + 'https': HTTPSConnectionPool, +} + +log = logging.getLogger(__name__) + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least recently + used pool. + + :param \**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example: :: + + >>> manager = PoolManager() + >>> r = manager.urlopen("http://google.com/") + >>> r = manager.urlopen("http://google.com/mail") + >>> r = manager.urlopen("http://yahoo.com/") + >>> len(r.pools) + 2 + + """ + + # TODO: Make sure there are no memory leaks here. + + def __init__(self, num_pools=10, **connection_pool_kw): + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer(num_pools) + + def connection_from_host(self, host, port=80, scheme='http'): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + Note that an appropriate ``port`` value is required here to normalize + connection pools in our container most effectively. + """ + pool_key = (scheme, host, port) + + # If the scheme, host, or port doesn't match existing open connections, + # open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + pool_cls = pool_classes_by_scheme[scheme] + pool = pool_cls(host, port, **self.connection_pool_kw) + + self.pools[pool_key] = pool + + return pool + + def connection_from_url(self, url): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url` but + doesn't pass any additional parameters to the + :class:`urllib3.connectionpool.ConnectionPool` constructor. + + Additional parameters are taken from the :class:`.PoolManager` + constructor. + """ + scheme, host, port = get_host(url) + + port = port or port_by_scheme.get(scheme, 80) + + return self.connection_from_host(host, port=port, scheme=scheme) + + def urlopen(self, method, url, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + + ``url`` must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + conn = self.connection_from_url(url) + try: + return conn.urlopen(method, url, **kw) + + except HostChangedError as e: + kw['retries'] = e.retries # Persist retries countdown + return self.urlopen(method, e.url, **kw) + + +class ProxyManager(RequestMethods): + """ + Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method + will make requests to any url through the defined proxy. + """ + + def __init__(self, proxy_pool): + self.proxy_pool = proxy_pool + + def _set_proxy_headers(self, headers=None): + headers = headers or {} + + # Same headers are curl passes for --proxy1.0 + headers['Accept'] = '*/*' + headers['Proxy-Connection'] = 'Keep-Alive' + + return headers + + def urlopen(self, method, url, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + kw['assert_same_host'] = False + kw['headers'] = self._set_proxy_headers(kw.get('headers')) + return self.proxy_pool.urlopen(method, url, **kw) + + +def proxy_from_url(url, **pool_kw): + proxy_pool = connection_from_url(url, **pool_kw) + return ProxyManager(proxy_pool) diff --git a/urllib3/request.py b/urllib3/request.py new file mode 100644 index 0000000..5ea26a0 --- /dev/null +++ b/urllib3/request.py @@ -0,0 +1,147 @@ +# urllib3/request.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode + +from .filepost import encode_multipart_formdata + + +__all__ = ['RequestMethods'] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.poolmanager.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are encoded + in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-orm-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + """ + + _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) + + _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + + def urlopen(self, method, url, body=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **kw): + raise NotImplemented("Classes extending RequestMethods must implement " + "their own ``urlopen`` method.") + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the option + to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + + if method in self._encode_url_methods: + return self.request_encode_url(method, url, fields=fields, + headers=headers, + **urlopen_kw) + else: + return self.request_encode_body(method, url, fields=fields, + headers=headers, + **urlopen_kw) + + def request_encode_url(self, method, url, fields=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if fields: + url += '?' + urlencode(fields) + return self.urlopen(method, url, **urlopen_kw) + + def request_encode_body(self, method, url, fields=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the + payload with the appropriate content type. Otherwise + :meth:`urllib.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request signing, + such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'nonamefile': ('contents of nonamefile field'), + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimick behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will be + overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if encode_multipart: + body, content_type = encode_multipart_formdata(fields or {}, + boundary=multipart_boundary) + else: + body, content_type = (urlencode(fields or {}), + 'application/x-www-form-urlencoded') + + headers = headers or {} + headers.update({'Content-Type': content_type}) + + return self.urlopen(method, url, body=body, headers=headers, + **urlopen_kw) + + # Deprecated: + + def get_url(self, url, fields=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_url('GET', url, fields=fields, + **urlopen_kw) + + def post_url(self, url, fields=None, headers=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_body('POST', url, fields=fields, + headers=headers, + **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py new file mode 100644 index 0000000..4dd431e --- /dev/null +++ b/urllib3/response.py @@ -0,0 +1,191 @@ +# urllib3/response.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import gzip +import logging +import zlib + +from io import BytesIO + +from .exceptions import HTTPError +from .packages.six import string_types as basestring + + +log = logging.getLogger(__name__) + + +def decode_gzip(data): + gzipper = gzip.GzipFile(fileobj=BytesIO(data)) + return gzipper.read() + + +def decode_deflate(data): + try: + return zlib.decompress(data) + except zlib.error: + return zlib.decompress(data, -zlib.MAX_WBITS) + + +class HTTPResponse(object): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param original_response: + When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + """ + + CONTENT_DECODERS = { + 'gzip': decode_gzip, + 'deflate': decode_deflate, + } + + def __init__(self, body='', headers=None, status=0, version=0, reason=None, + strict=0, preload_content=True, decode_content=True, + original_response=None, pool=None, connection=None): + self.headers = headers or {} + self.status = status + self.version = version + self.reason = reason + self.strict = strict + + self._decode_content = decode_content + self._body = body if body and isinstance(body, basestring) else None + self._fp = None + self._original_response = original_response + + self._pool = pool + self._connection = connection + + if hasattr(body, 'read'): + self._fp = body + + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in [301, 302, 303, 307]: + return self.headers.get('location') + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(cache_content=True) + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, decoding and caching + is skipped because we can't decode partial content nor does it make + sense to cache partial content as the full response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. (Overridden if ``amt`` is set.) + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + content_encoding = self.headers.get('content-encoding') + decoder = self.CONTENT_DECODERS.get(content_encoding) + if decode_content is None: + decode_content = self._decode_content + + if self._fp is None: + return + + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + else: + return self._fp.read(amt) + + try: + if decode_content and decoder: + data = decoder(data) + except IOError: + raise HTTPError("Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding) + + if cache_content: + self._body = data + + return data + + finally: + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + @classmethod + def from_httplib(ResponseCls, r, **response_kw): + """ + Given an :class:`httplib.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + + # HTTPResponse objects in Python 3 don't have a .strict attribute + strict = getattr(r, 'strict', 0) + return ResponseCls(body=r, + # In Python 3, the header keys are returned capitalised + headers=dict((k.lower(), v) for k,v in r.getheaders()), + status=r.status, + version=r.version, + reason=r.reason, + strict=strict, + original_response=r, + **response_kw) + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) -- cgit v1.2.3 From 77245469d4fbd400c6702cde35f9d9002540663e Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:31 -0700 Subject: Imported Upstream version 1.3 --- CHANGES.rst | 18 ++++++ PKG-INFO | 21 ++++++- README.rst | 1 + dummyserver/server.py | 4 +- test/test_collections.py | 1 + test/test_connectionpool.py | 10 +--- test/test_filepost.py | 85 +++++++++++++++++++++++++++ urllib3.egg-info/PKG-INFO | 21 ++++++- urllib3.egg-info/SOURCES.txt | 2 + urllib3/__init__.py | 18 ++---- urllib3/connectionpool.py | 134 +++++------------------------------------- urllib3/filepost.py | 24 ++++++-- urllib3/poolmanager.py | 4 +- urllib3/request.py | 21 +------ urllib3/response.py | 15 ++++- urllib3/util.py | 136 +++++++++++++++++++++++++++++++++++++++++++ 16 files changed, 342 insertions(+), 173 deletions(-) create mode 100644 test/test_filepost.py create mode 100644 urllib3/util.py diff --git a/CHANGES.rst b/CHANGES.rst index d998db8..5afdd62 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,24 @@ Changes ======= +1.3 (2012-03-25) +++++++++++++++++ + +* Removed pre-1.0 deprecated API. + +* Refactored helpers into a ``urllib3.util`` submodule. + +* Fixed multipart encoding to support list-of-tuples for keys with multiple + values. (Issue #48) + +* Fixed multiple Set-Cookie headers in response not getting merged properly in + Python 3. (Issue #53) + +* AppEngine support with Py27. (Issue #61) + +* Minor ``encode_multipart_formdata`` fixes related to Python 3 strings vs + bytes. + 1.2.2 (2012-02-06) ++++++++++++++++++ diff --git a/PKG-INFO b/PKG-INFO index 2f0ab55..17d8a02 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: urllib3 -Version: 1.2.2 +Version: 1.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -16,6 +16,7 @@ Description: Highlights - Built-in redirection and retries (optional). - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. + - Works with AppEngine, gevent, and eventlib. - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at @@ -108,6 +109,24 @@ Description: Highlights Changes ======= + 1.3 (2012-03-25) + ++++++++++++++++ + + * Removed pre-1.0 deprecated API. + + * Refactored helpers into a ``urllib3.util`` submodule. + + * Fixed multipart encoding to support list-of-tuples for keys with multiple + values. (Issue #48) + + * Fixed multiple Set-Cookie headers in response not getting merged properly in + Python 3. (Issue #53) + + * AppEngine support with Py27. (Issue #61) + + * Minor ``encode_multipart_formdata`` fixes related to Python 3 strings vs + bytes. + 1.2.2 (2012-02-06) ++++++++++++++++++ diff --git a/README.rst b/README.rst index b376c81..f177852 100644 --- a/README.rst +++ b/README.rst @@ -8,6 +8,7 @@ Highlights - Built-in redirection and retries (optional). - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. +- Works with AppEngine, gevent, and eventlib. - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at diff --git a/dummyserver/server.py b/dummyserver/server.py index 529850f..6c0943c 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -83,7 +83,7 @@ class TornadoServerThread(threading.Thread): else: http_server = tornado.httpserver.HTTPServer(container) - http_server.listen(self.port) + http_server.listen(self.port, address=self.host) return http_server def run(self): @@ -106,7 +106,7 @@ if __name__ == '__main__': if len(sys.argv) > 1: url = sys.argv[1] - print("Starting WGI server at: %s" % url) + print("Starting WSGI server at: %s" % url) scheme, host, port = get_host(url) t = TornadoServerThread(scheme=scheme, host=host, port=port) diff --git a/test/test_collections.py b/test/test_collections.py index f8275e0..6cb5aca 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -4,6 +4,7 @@ from urllib3._collections import RecentlyUsedContainer as Container from urllib3.packages import six xrange = six.moves.xrange + class TestLRUContainer(unittest.TestCase): def test_maxsize(self): d = Container(5) diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 4281d42..c32c6dc 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,11 +1,7 @@ import unittest -from urllib3.connectionpool import ( - connection_from_url, - get_host, - HTTPConnectionPool, - make_headers) - +from urllib3.connectionpool import connection_from_url, HTTPConnectionPool +from urllib3.util import get_host, make_headers from urllib3.exceptions import EmptyPoolError, LocationParseError @@ -105,7 +101,7 @@ class TestConnectionPool(unittest.TestCase): pass try: - pool.get_url('/', pool_timeout=0.01) + pool.request('GET', '/', pool_timeout=0.01) self.fail("Managed to get a connection without EmptyPoolError") except EmptyPoolError: pass diff --git a/test/test_filepost.py b/test/test_filepost.py new file mode 100644 index 0000000..c251778 --- /dev/null +++ b/test/test_filepost.py @@ -0,0 +1,85 @@ +import unittest + +from urllib3.filepost import encode_multipart_formdata, iter_fields +from urllib3.packages.six import b, u + + +BOUNDARY = '!! test boundary !!' + + +class TestIterfields(unittest.TestCase): + + def test_dict(self): + for fieldname, value in iter_fields(dict(a='b')): + self.assertEqual((fieldname, value), ('a', 'b')) + + self.assertEqual( + list(sorted(iter_fields(dict(a='b', c='d')))), + [('a', 'b'), ('c', 'd')]) + + def test_tuple_list(self): + for fieldname, value in iter_fields([('a', 'b')]): + self.assertEqual((fieldname, value), ('a', 'b')) + + self.assertEqual( + list(iter_fields([('a', 'b'), ('c', 'd')])), + [('a', 'b'), ('c', 'd')]) + + +class TestMultipartEncoding(unittest.TestCase): + + def test_input_datastructures(self): + fieldsets = [ + dict(k='v', k2='v2'), + [('k', 'v'), ('k2', 'v2')], + ] + + for fields in fieldsets: + encoded, _ = encode_multipart_formdata(fields, boundary=BOUNDARY) + self.assertEqual(encoded.count(b(BOUNDARY)), 3) + + + def test_field_encoding(self): + fieldsets = [ + [('k', 'v'), ('k2', 'v2')], + [('k', b'v'), (u('k2'), b'v2')], + [('k', b'v'), (u('k2'), 'v2')], + ] + + for fields in fieldsets: + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k2"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v2\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + , fields) + + self.assertEqual(content_type, + b'multipart/form-data; boundary=' + b(BOUNDARY)) + + + def test_filename(self): + fields = [('k', ('somename', b'v'))] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"; filename="somename"\r\n' + b'Content-Type: application/octet-stream\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) + + self.assertEqual(content_type, + b'multipart/form-data; boundary=' + b(BOUNDARY)) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 2f0ab55..17d8a02 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: urllib3 -Version: 1.2.2 +Version: 1.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -16,6 +16,7 @@ Description: Highlights - Built-in redirection and retries (optional). - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. + - Works with AppEngine, gevent, and eventlib. - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at @@ -108,6 +109,24 @@ Description: Highlights Changes ======= + 1.3 (2012-03-25) + ++++++++++++++++ + + * Removed pre-1.0 deprecated API. + + * Refactored helpers into a ``urllib3.util`` submodule. + + * Fixed multipart encoding to support list-of-tuples for keys with multiple + values. (Issue #48) + + * Fixed multiple Set-Cookie headers in response not getting merged properly in + Python 3. (Issue #53) + + * AppEngine support with Py27. (Issue #61) + + * Minor ``encode_multipart_formdata`` fixes related to Python 3 strings vs + bytes. + 1.2.2 (2012-02-06) ++++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index d7cbb3d..d79710b 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -14,6 +14,7 @@ test/__init__.py test/benchmark.py test/test_collections.py test/test_connectionpool.py +test/test_filepost.py test/test_poolmanager.py test/test_response.py urllib3/__init__.py @@ -24,6 +25,7 @@ urllib3/filepost.py urllib3/poolmanager.py urllib3/request.py urllib3/response.py +urllib3/util.py urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 2e9c663..2d6fece 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,26 +10,20 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.2.2' +__version__ = '1.3' from .connectionpool import ( HTTPConnectionPool, HTTPSConnectionPool, - connection_from_url, - get_host, - make_headers) - - -from .exceptions import ( - HTTPError, - MaxRetryError, - SSLError, - TimeoutError) + connection_from_url +) +from . import exceptions +from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .filepost import encode_multipart_formdata +from .util import make_headers, get_host # Set default logging handler to avoid "No handler found" warnings. diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 39e652e..c3cb3b1 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -7,15 +7,8 @@ import logging import socket -from base64 import b64encode from socket import error as SocketError, timeout as SocketTimeout -try: - from select import poll, POLLIN -except ImportError: # Doesn't exist on OSX and other platforms - from select import select - poll = False - try: # Python 3 from http.client import HTTPConnection, HTTPException from http.client import HTTP_PORT, HTTPS_PORT @@ -42,17 +35,16 @@ try: # Compiled with SSL? import ssl BaseSSLError = ssl.SSLError -except ImportError: +except (ImportError, AttributeError): pass -from .packages.ssl_match_hostname import match_hostname, CertificateError from .request import RequestMethods from .response import HTTPResponse +from .util import get_host, is_connection_dropped from .exceptions import ( EmptyPoolError, HostChangedError, - LocationParseError, MaxRetryError, SSLError, TimeoutError, @@ -61,6 +53,7 @@ from .exceptions import ( from .packages.ssl_match_hostname import match_hostname, CertificateError from .packages import six + xrange = six.moves.xrange log = logging.getLogger(__name__) @@ -72,6 +65,7 @@ port_by_scheme = { 'https': HTTPS_PORT, } + ## Connection objects (extension of httplib) class VerifiedHTTPSConnection(HTTPSConnection): @@ -107,6 +101,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): if self.ca_certs: match_hostname(self.sock.getpeercert(), self.host) + ## Pool objects class ConnectionPool(object): @@ -212,7 +207,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = self.pool.get(block=self.block, timeout=timeout) # If this is a persistent connection, check if it got disconnected - if conn and conn.sock and is_connection_dropped(conn): + if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() @@ -256,9 +251,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout = self.timeout conn.timeout = timeout # This only does anything in Py26+ - conn.request(method, url, **httplib_request_kw) - conn.sock.settimeout(timeout) + + # Set timeout + sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. + if sock: + sock.settimeout(timeout) + httplib_response = conn.getresponse() log.debug("\"%s %s %s\" %s %s" % @@ -295,7 +294,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): .. note:: More commonly, it's appropriate to use a convenience method provided - by :class:`.RequestMethods`, such as :meth:`.request`. + by :class:`.RequestMethods`, such as :meth:`request`. .. note:: @@ -495,94 +494,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): return connection -## Helpers - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - return headers - - -def get_host(url): - """ - Given a url, return its scheme, host and port (None if it's not there). - - For example: :: - - >>> get_host('http://google.com/mail/') - ('http', 'google.com', None) - >>> get_host('google.com:80') - ('http', 'google.com', 80) - """ - - # This code is actually similar to urlparse.urlsplit, but much - # simplified for our needs. - port = None - scheme = 'http' - - if '://' in url: - scheme, url = url.split('://', 1) - if '/' in url: - url, _path = url.split('/', 1) - if '@' in url: - _auth, url = url.split('@', 1) - if ':' in url: - url, port = url.split(':', 1) - - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s") - - port = int(port) - - return scheme, url, port - - def connection_from_url(url, **kw): """ Given a url, return an :class:`.ConnectionPool` instance of its host. @@ -608,22 +519,3 @@ def connection_from_url(url, **kw): return HTTPSConnectionPool(host, port=port, **kw) else: return HTTPConnectionPool(host, port=port, **kw) - - -def is_connection_dropped(conn): - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - ``HTTPConnection`` object. - """ - if not poll: # Platform-specific - return select([conn.sock], [], [], 0.0)[0] - - # This version is better on platforms that support it. - p = poll() - p.register(conn.sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == conn.sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e1ec8af..344a103 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -24,15 +24,29 @@ def get_content_type(filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_fields(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts. + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) + + def encode_multipart_formdata(fields, boundary=None): """ Encode a dictionary of ``fields`` using the multipart/form-data mime format. :param fields: - Dictionary of fields. The key is treated as the field name, and the - value as the body of the form-data. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section. + Dictionary of fields or list of (key, value) field tuples. The key is + treated as the field name, and the value as the body of the form-data + bytes. If the value is a tuple of two elements, then the first element + is treated as the filename of the form-data section. + + Field names and filenames must be unicode. :param boundary: If not specified, then a random boundary will be generated using @@ -42,7 +56,7 @@ def encode_multipart_formdata(fields, boundary=None): if boundary is None: boundary = choose_boundary() - for fieldname, value in six.iteritems(fields): + for fieldname, value in iter_fields(fields): body.write(b('--%s\r\n' % (boundary))) if isinstance(value, tuple): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index d42f35b..310ea21 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -39,11 +39,11 @@ class PoolManager(RequestMethods): Example: :: - >>> manager = PoolManager() + >>> manager = PoolManager(num_pools=2) >>> r = manager.urlopen("http://google.com/") >>> r = manager.urlopen("http://google.com/mail") >>> r = manager.urlopen("http://yahoo.com/") - >>> len(r.pools) + >>> len(manager.pools) 2 """ diff --git a/urllib3/request.py b/urllib3/request.py index 5ea26a0..569ac96 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -44,7 +44,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -126,22 +126,3 @@ class RequestMethods(object): return self.urlopen(method, url, body=body, headers=headers, **urlopen_kw) - - # Deprecated: - - def get_url(self, url, fields=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_url('GET', url, fields=fields, - **urlopen_kw) - - def post_url(self, url, fields=None, headers=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_body('POST', url, fields=fields, - headers=headers, - **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py index 4dd431e..5fab824 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -171,11 +171,22 @@ class HTTPResponse(object): with ``original_response=r``. """ + # Normalize headers between different versions of Python + headers = {} + for k, v in r.getheaders(): + # Python 3: Header keys are returned capitalised + k = k.lower() + + has_value = headers.get(k) + if has_value: # Python 3: Repeating header keys are unmerged. + v = ', '.join([has_value, v]) + + headers[k] = v + # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) return ResponseCls(body=r, - # In Python 3, the header keys are returned capitalised - headers=dict((k.lower(), v) for k,v in r.getheaders()), + headers=headers, status=r.status, version=r.version, reason=r.reason, diff --git a/urllib3/util.py b/urllib3/util.py new file mode 100644 index 0000000..2684a2f --- /dev/null +++ b/urllib3/util.py @@ -0,0 +1,136 @@ +# urllib3/util.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +from base64 import b64encode + +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + +from .packages import six +from .exceptions import LocationParseError + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: :: + + >>> get_host('http://google.com/mail/') + ('http', 'google.com', None) + >>> get_host('google.com:80') + ('http', 'google.com', 80) + """ + + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + + if '://' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, _path = url.split('/', 1) + if '@' in url: + _auth, url = url.split('@', 1) + if ':' in url: + url, port = url.split(':', 1) + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + + port = int(port) + + return scheme, url, port + + + +def is_connection_dropped(conn): + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + ``HTTPConnection`` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if not sock: #Platform-specific: AppEngine + return False + + if not poll: # Platform-specific + if not select: #Platform-specific: AppEngine + return False + + return select([sock], [], [], 0.0)[0] + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True -- cgit v1.2.3 From e5b66555b54a9854b340975471e8cdfa64e311f7 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:32 -0700 Subject: Imported Upstream version 1.5 --- CHANGES.rst | 37 +++ CONTRIBUTORS.txt | 6 + PKG-INFO | 43 +++- README.rst | 2 +- dummyserver/handlers.py | 10 +- setup.py | 2 +- test-requirements.txt | 2 +- test/__init__.py | 0 test/benchmark.py | 77 ------ test/test_collections.py | 55 +++-- test/test_connectionpool.py | 140 ++++++----- test/test_poolmanager.py | 24 ++ test/test_response.py | 4 +- test/test_util.py | 150 ++++++++++++ urllib3.egg-info/PKG-INFO | 43 +++- urllib3.egg-info/SOURCES.txt | 7 +- urllib3/__init__.py | 22 +- urllib3/_collections.py | 169 ++++++-------- urllib3/connectionpool.py | 99 +++++--- urllib3/contrib/__init__.py | 0 urllib3/contrib/ntlmpool.py | 120 ---------- urllib3/exceptions.py | 10 + urllib3/filepost.py | 13 +- .../packages/mimetools_choose_boundary/__init__.py | 47 ---- urllib3/packages/ordered_dict.py | 260 +++++++++++++++++++++ urllib3/poolmanager.py | 64 +++-- urllib3/response.py | 8 +- urllib3/util.py | 194 +++++++++++---- 28 files changed, 1061 insertions(+), 547 deletions(-) delete mode 100644 test/__init__.py delete mode 100644 test/benchmark.py create mode 100644 test/test_util.py delete mode 100644 urllib3/contrib/__init__.py delete mode 100644 urllib3/contrib/ntlmpool.py delete mode 100644 urllib3/packages/mimetools_choose_boundary/__init__.py create mode 100644 urllib3/packages/ordered_dict.py diff --git a/CHANGES.rst b/CHANGES.rst index 5afdd62..a0cbdb3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,43 @@ Changes ======= +1.5 (2012-08-02) +++++++++++++++++ + +* Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug + logging in urllib3. + +* Native full URL parsing (including auth, path, query, fragment) available in + ``urllib3.util.parse_url(url)``. + +* Built-in redirect will switch method to 'GET' if status code is 303. + (Issue #11) + +* ``urllib3.PoolManager`` strips the scheme and host before sending the request + uri. (Issue #8) + +* New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, + based on the Content-Type header, fails. + +* Fixed bug with pool depletion and leaking connections (Issue #76). Added + explicit connection closing on pool eviction. Added + ``urllib3.PoolManager.clear()``. + +* 99% -> 100% unit test coverage. + + +1.4 (2012-06-16) +++++++++++++++++ + +* Minor AppEngine-related fixes. + +* Switched from ``mimetools.choose_boundary`` to ``uuid.uuid4()``. + +* Improved url parsing. (Issue #73) + +* IPv6 url support. (Issue #72) + + 1.3 (2012-03-25) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 37140ca..7dfbcaf 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -39,5 +39,11 @@ In chronological order: * brandon-rhodes * Design review, bugfixes, test coverage. +* studer + * IPv6 url support and test coverage + +* Shivaram Lingamneni + * Support for explicitly closing pooled connections + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/PKG-INFO b/PKG-INFO index 17d8a02..4e79ea8 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.3 +Version: 1.5 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -17,7 +17,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -109,6 +109,43 @@ Description: Highlights Changes ======= + 1.5 (2012-08-02) + ++++++++++++++++ + + * Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug + logging in urllib3. + + * Native full URL parsing (including auth, path, query, fragment) available in + ``urllib3.util.parse_url(url)``. + + * Built-in redirect will switch method to 'GET' if status code is 303. + (Issue #11) + + * ``urllib3.PoolManager`` strips the scheme and host before sending the request + uri. (Issue #8) + + * New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, + based on the Content-Type header, fails. + + * Fixed bug with pool depletion and leaking connections (Issue #76). Added + explicit connection closing on pool eviction. Added + ``urllib3.PoolManager.clear()``. + + * 99% -> 100% unit test coverage. + + + 1.4 (2012-06-16) + ++++++++++++++++ + + * Minor AppEngine-related fixes. + + * Switched from ``mimetools.choose_boundary`` to ``uuid.uuid4()``. + + * Improved url parsing. (Issue #73) + + * IPv6 url support. (Issue #72) + + 1.3 (2012-03-25) ++++++++++++++++ diff --git a/README.rst b/README.rst index f177852..144df0e 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,7 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. +- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index 3e32881..ca809ad 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -145,14 +145,20 @@ class TestingApp(WSGIHandler): data = b"hello, world!" encoding = request.headers.get('Accept-Encoding', '') headers = None - if 'gzip' in encoding: + if encoding == 'gzip': headers = [('Content-Encoding', 'gzip')] file_ = BytesIO() gzip.GzipFile('', mode='w', fileobj=file_).write(data) data = file_.getvalue() - elif 'deflate' in encoding: + elif encoding == 'deflate': headers = [('Content-Encoding', 'deflate')] data = zlib.compress(data) + elif encoding == 'garbage-gzip': + headers = [('Content-Encoding', 'gzip')] + data = 'garbage' + elif encoding == 'garbage-deflate': + headers = [('Content-Encoding', 'deflate')] + data = 'garbage' return Response(data, headers=headers) def shutdown(self, request): diff --git a/setup.py b/setup.py index f093f34..84d6e7f 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ setup(name='urllib3', url='http://urllib3.readthedocs.org/', license='MIT', packages=['urllib3', 'dummyserver', 'urllib3.packages', - 'urllib3.packages.ssl_match_hostname', 'urllib3.packages.mimetools_choose_boundary', + 'urllib3.packages.ssl_match_hostname', ], requires=requirements, tests_require=tests_requirements, diff --git a/test-requirements.txt b/test-requirements.txt index 568b0d4..e2d1579 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,2 @@ nose -tornado +tornado==2.1.1 diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/benchmark.py b/test/benchmark.py deleted file mode 100644 index e7049c4..0000000 --- a/test/benchmark.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -""" -Really simple rudimentary benchmark to compare ConnectionPool versus standard -urllib to demonstrate the usefulness of connection re-using. -""" -from __future__ import print_function - -import sys -import time -import urllib - -sys.path.append('../') -import urllib3 - - -# URLs to download. Doesn't matter as long as they're from the same host, so we -# can take advantage of connection re-using. -TO_DOWNLOAD = [ - 'http://code.google.com/apis/apps/', - 'http://code.google.com/apis/base/', - 'http://code.google.com/apis/blogger/', - 'http://code.google.com/apis/calendar/', - 'http://code.google.com/apis/codesearch/', - 'http://code.google.com/apis/contact/', - 'http://code.google.com/apis/books/', - 'http://code.google.com/apis/documents/', - 'http://code.google.com/apis/finance/', - 'http://code.google.com/apis/health/', - 'http://code.google.com/apis/notebook/', - 'http://code.google.com/apis/picasaweb/', - 'http://code.google.com/apis/spreadsheets/', - 'http://code.google.com/apis/webmastertools/', - 'http://code.google.com/apis/youtube/', -] - - -def urllib_get(url_list): - assert url_list - for url in url_list: - now = time.time() - r = urllib.urlopen(url) - elapsed = time.time() - now - print("Got in %0.3f: %s" % (elapsed, url)) - - -def pool_get(url_list): - assert url_list - pool = urllib3.connection_from_url(url_list[0]) - for url in url_list: - now = time.time() - r = pool.get_url(url) - elapsed = time.time() - now - print("Got in %0.3fs: %s" % (elapsed, url)) - - -if __name__ == '__main__': - print("Running pool_get ...") - now = time.time() - pool_get(TO_DOWNLOAD) - pool_elapsed = time.time() - now - - print("Running urllib_get ...") - now = time.time() - urllib_get(TO_DOWNLOAD) - urllib_elapsed = time.time() - now - - print("Completed pool_get in %0.3fs" % pool_elapsed) - print("Completed urllib_get in %0.3fs" % urllib_elapsed) - - -""" -Example results: - -Completed pool_get in 1.163s -Completed urllib_get in 2.318s -""" diff --git a/test/test_collections.py b/test/test_collections.py index 6cb5aca..098b31a 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -36,19 +36,7 @@ class TestLRUContainer(unittest.TestCase): d[5] = '5' # Check state - self.assertEqual(list(d.keys()), [0, 2, 3, 4, 5]) - - def test_pruning(self): - d = Container(5) - - for i in xrange(5): - d[i] = str(i) - - # Contend 2 entries for the most-used slot to balloon the heap - for i in xrange(100): - d.get(i % 2) - - self.assertTrue(len(d.access_log) <= d.CLEANUP_FACTOR * d._maxsize) + self.assertEqual(list(d.keys()), [2, 3, 4, 0, 5]) def test_same_key(self): d = Container(5) @@ -57,10 +45,7 @@ class TestLRUContainer(unittest.TestCase): d['foo'] = i self.assertEqual(list(d.keys()), ['foo']) - - d._prune_invalidated_entries() - - self.assertEqual(len(d.access_log), 1) + self.assertEqual(len(d), 1) def test_access_ordering(self): d = Container(5) @@ -68,13 +53,14 @@ class TestLRUContainer(unittest.TestCase): for i in xrange(10): d[i] = True - self.assertEqual(d._get_ordered_access_keys(), [9,8,7,6,5]) + # Keys should be ordered by access time + self.assertEqual(list(d.keys()), [5, 6, 7, 8, 9]) new_order = [7,8,6,9,5] - for k in reversed(new_order): + for k in new_order: d[k] - self.assertEqual(d._get_ordered_access_keys(), new_order) + self.assertEqual(list(d.keys()), new_order) def test_delete(self): d = Container(5) @@ -107,6 +93,35 @@ class TestLRUContainer(unittest.TestCase): self.assertRaises(KeyError, lambda: d[5]) + def test_disposal(self): + evicted_items = [] + + def dispose_func(arg): + # Save the evicted datum for inspection + evicted_items.append(arg) + + d = Container(5, dispose_func=dispose_func) + for i in xrange(5): + d[i] = i + self.assertEqual(list(d.keys()), list(xrange(5))) + self.assertEqual(evicted_items, []) # Nothing disposed + + d[5] = 5 + self.assertEqual(list(d.keys()), list(xrange(1, 6))) + self.assertEqual(evicted_items, [0]) + + del d[1] + self.assertEqual(evicted_items, [0, 1]) + + d.clear() + self.assertEqual(evicted_items, [0, 1, 2, 3, 4, 5]) + + def test_iter(self): + d = Container() + + with self.assertRaises(NotImplementedError): + for i in d: + self.fail("Iteration shouldn't be implemented.") if __name__ == '__main__': unittest.main() diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index c32c6dc..afc3098 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,30 +1,28 @@ import unittest from urllib3.connectionpool import connection_from_url, HTTPConnectionPool -from urllib3.util import get_host, make_headers -from urllib3.exceptions import EmptyPoolError, LocationParseError +from urllib3.packages.ssl_match_hostname import CertificateError +from urllib3.exceptions import ( + ClosedPoolError, + EmptyPoolError, + HostChangedError, + MaxRetryError, + SSLError, + TimeoutError, +) + +from socket import timeout as SocketTimeout +from ssl import SSLError as BaseSSLError + +try: # Python 3 + from queue import Empty + from http.client import HTTPException +except ImportError: + from Queue import Empty + from httplib import HTTPException class TestConnectionPool(unittest.TestCase): - def test_get_host(self): - url_host_map = { - 'http://google.com/mail': ('http', 'google.com', None), - 'http://google.com/mail/': ('http', 'google.com', None), - 'google.com/mail': ('http', 'google.com', None), - 'http://google.com/': ('http', 'google.com', None), - 'http://google.com': ('http', 'google.com', None), - 'http://www.google.com': ('http', 'www.google.com', None), - 'http://mail.google.com': ('http', 'mail.google.com', None), - 'http://google.com:8000/mail/': ('http', 'google.com', 8000), - 'http://google.com:8000': ('http', 'google.com', 8000), - 'https://google.com': ('https', 'google.com', None), - 'https://google.com:8000': ('https', 'google.com', 8000), - 'http://user:password@127.0.0.1:1234': ('http', '127.0.0.1', 1234), - } - for url, expected_host in url_host_map.items(): - returned_host = get_host(url) - self.assertEquals(returned_host, expected_host) - def test_same_host(self): same_host = [ ('http://google.com/', '/'), @@ -50,45 +48,6 @@ class TestConnectionPool(unittest.TestCase): c = connection_from_url(a) self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) - def test_invalid_host(self): - # TODO: Add more tests - invalid_host = [ - 'http://google.com:foo', - ] - - for location in invalid_host: - self.assertRaises(LocationParseError, get_host, location) - - - def test_make_headers(self): - self.assertEqual( - make_headers(accept_encoding=True), - {'accept-encoding': 'gzip,deflate'}) - - self.assertEqual( - make_headers(accept_encoding='foo,bar'), - {'accept-encoding': 'foo,bar'}) - - self.assertEqual( - make_headers(accept_encoding=['foo', 'bar']), - {'accept-encoding': 'foo,bar'}) - - self.assertEqual( - make_headers(accept_encoding=True, user_agent='banana'), - {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}) - - self.assertEqual( - make_headers(user_agent='banana'), - {'user-agent': 'banana'}) - - self.assertEqual( - make_headers(keep_alive=True), - {'connection': 'keep-alive'}) - - self.assertEqual( - make_headers(basic_auth='foo:bar'), - {'authorization': 'Basic Zm9vOmJhcg=='}) - def test_max_connections(self): pool = HTTPConnectionPool(host='localhost', maxsize=1, block=True) @@ -127,6 +86,67 @@ class TestConnectionPool(unittest.TestCase): str(EmptyPoolError(HTTPConnectionPool(host='localhost'), "Test.")), "HTTPConnectionPool(host='localhost', port=None): Test.") + def test_pool_size(self): + POOL_SIZE = 1 + pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True) + + def _raise(ex): + raise ex() + + def _test(exception, expect): + pool._make_request = lambda *args, **kwargs: _raise(exception) + with self.assertRaises(expect): + pool.request('GET', '/') + + self.assertEqual(pool.pool.qsize(), POOL_SIZE) + + #make sure that all of the exceptions return the connection to the pool + _test(Empty, TimeoutError) + _test(SocketTimeout, TimeoutError) + _test(BaseSSLError, SSLError) + _test(CertificateError, SSLError) + + # The pool should never be empty, and with these two exceptions being raised, + # a retry will be triggered, but that retry will fail, eventually raising + # MaxRetryError, not EmptyPoolError + # See: https://github.com/shazow/urllib3/issues/76 + pool._make_request = lambda *args, **kwargs: _raise(HTTPException) + with self.assertRaises(MaxRetryError): + pool.request('GET', '/', retries=1, pool_timeout=0.01) + self.assertEqual(pool.pool.qsize(), POOL_SIZE) + + def test_assert_same_host(self): + c = connection_from_url('http://google.com:80') + + with self.assertRaises(HostChangedError): + c.request('GET', 'http://yahoo.com:80', assert_same_host=True) + + def test_pool_close(self): + pool = connection_from_url('http://google.com:80') + + # Populate with some connections + conn1 = pool._get_conn() + conn2 = pool._get_conn() + conn3 = pool._get_conn() + pool._put_conn(conn1) + pool._put_conn(conn2) + + old_pool_queue = pool.pool + + pool.close() + self.assertEqual(pool.pool, None) + + with self.assertRaises(ClosedPoolError): + pool._get_conn() + + pool._put_conn(conn3) + + with self.assertRaises(ClosedPoolError): + pool._get_conn() + + with self.assertRaises(Empty): + old_pool_queue.get(block=False) + if __name__ == '__main__': unittest.main() diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 12722f7..273abf9 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -2,6 +2,7 @@ import unittest from urllib3.poolmanager import PoolManager from urllib3 import connection_from_url +from urllib3.exceptions import ClosedPoolError class TestPoolManager(unittest.TestCase): @@ -42,6 +43,29 @@ class TestPoolManager(unittest.TestCase): self.assertEqual(len(connections), 5) + def test_manager_clear(self): + p = PoolManager(5) + + conn_pool = p.connection_from_url('http://google.com') + self.assertEqual(len(p.pools), 1) + + conn = conn_pool._get_conn() + + p.clear() + self.assertEqual(len(p.pools), 0) + + with self.assertRaises(ClosedPoolError): + conn_pool._get_conn() + + conn_pool._put_conn(conn) + + with self.assertRaises(ClosedPoolError): + conn_pool._get_conn() + + self.assertEqual(len(p.pools), 0) + + + if __name__ == '__main__': unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 0ef379c..964f677 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -1,9 +1,9 @@ import unittest -import zlib from io import BytesIO from urllib3.response import HTTPResponse +from urllib3.exceptions import DecodeError class TestLegacyResponse(unittest.TestCase): def test_getheaders(self): @@ -50,7 +50,7 @@ class TestResponse(unittest.TestCase): def test_decode_bad_data(self): fp = BytesIO(b'\x00' * 10) - self.assertRaises(zlib.error, HTTPResponse, fp, headers={ + self.assertRaises(DecodeError, HTTPResponse, fp, headers={ 'content-encoding': 'deflate' }) diff --git a/test/test_util.py b/test/test_util.py new file mode 100644 index 0000000..a989da6 --- /dev/null +++ b/test/test_util.py @@ -0,0 +1,150 @@ +import unittest +import logging + +from urllib3 import add_stderr_logger +from urllib3.util import get_host, make_headers, split_first, parse_url, Url +from urllib3.exceptions import LocationParseError + + +class TestUtil(unittest.TestCase): + def test_get_host(self): + url_host_map = { + # Hosts + 'http://google.com/mail': ('http', 'google.com', None), + 'http://google.com/mail/': ('http', 'google.com', None), + 'google.com/mail': ('http', 'google.com', None), + 'http://google.com/': ('http', 'google.com', None), + 'http://google.com': ('http', 'google.com', None), + 'http://www.google.com': ('http', 'www.google.com', None), + 'http://mail.google.com': ('http', 'mail.google.com', None), + 'http://google.com:8000/mail/': ('http', 'google.com', 8000), + 'http://google.com:8000': ('http', 'google.com', 8000), + 'https://google.com': ('https', 'google.com', None), + 'https://google.com:8000': ('https', 'google.com', 8000), + 'http://user:password@127.0.0.1:1234': ('http', '127.0.0.1', 1234), + 'http://google.com/foo=http://bar:42/baz': ('http', 'google.com', None), + 'http://google.com?foo=http://bar:42/baz': ('http', 'google.com', None), + 'http://google.com#foo=http://bar:42/baz': ('http', 'google.com', None), + + # IPv4 + '173.194.35.7': ('http', '173.194.35.7', None), + 'http://173.194.35.7': ('http', '173.194.35.7', None), + 'http://173.194.35.7/test': ('http', '173.194.35.7', None), + 'http://173.194.35.7:80': ('http', '173.194.35.7', 80), + 'http://173.194.35.7:80/test': ('http', '173.194.35.7', 80), + + # IPv6 + '[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), + 'http://[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), + 'http://[2a00:1450:4001:c01::67]/test': ('http', '2a00:1450:4001:c01::67', None), + 'http://[2a00:1450:4001:c01::67]:80': ('http', '2a00:1450:4001:c01::67', 80), + 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '2a00:1450:4001:c01::67', 80), + + # More IPv6 from http://www.ietf.org/rfc/rfc2732.txt + 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210', 8000), + 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '1080:0:0:0:8:800:200C:417A', None), + 'http://[3ffe:2a00:100:7031::1]': ('http', '3ffe:2a00:100:7031::1', None), + 'http://[1080::8:800:200C:417A]/foo': ('http', '1080::8:800:200C:417A', None), + 'http://[::192.9.5.5]/ipng': ('http', '::192.9.5.5', None), + 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '::FFFF:129.144.52.38', 42), + 'http://[2010:836B:4179::836B:4179]': ('http', '2010:836B:4179::836B:4179', None), + } + for url, expected_host in url_host_map.items(): + returned_host = get_host(url) + self.assertEquals(returned_host, expected_host) + + def test_invalid_host(self): + # TODO: Add more tests + invalid_host = [ + 'http://google.com:foo', + ] + + for location in invalid_host: + self.assertRaises(LocationParseError, get_host, location) + + def test_parse_url(self): + url_host_map = { + 'http://google.com/mail': Url('http', host='google.com', path='/mail'), + 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), + 'google.com/mail': Url(host='google.com', path='/mail'), + 'http://google.com/': Url('http', host='google.com', path='/'), + 'http://google.com': Url('http', host='google.com'), + 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), + '': Url(), + '/': Url(path='/'), + '?': Url(path='', query=''), + '#': Url(path='', fragment=''), + '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'), + '/foo': Url(path='/foo'), + '/foo?bar=baz': Url(path='/foo', query='bar=baz'), + '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), + } + for url, expected_url in url_host_map.items(): + returned_url = parse_url(url) + self.assertEquals(returned_url, expected_url) + + def test_request_uri(self): + url_host_map = { + 'http://google.com/mail': '/mail', + 'http://google.com/mail/': '/mail/', + 'http://google.com/': '/', + 'http://google.com': '/', + '': '/', + '/': '/', + '?': '/?', + '#': '/', + '/foo?bar=baz': '/foo?bar=baz', + } + for url, expected_request_uri in url_host_map.items(): + returned_url = parse_url(url) + self.assertEquals(returned_url.request_uri, expected_request_uri) + + def test_make_headers(self): + self.assertEqual( + make_headers(accept_encoding=True), + {'accept-encoding': 'gzip,deflate'}) + + self.assertEqual( + make_headers(accept_encoding='foo,bar'), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=['foo', 'bar']), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=True, user_agent='banana'), + {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}) + + self.assertEqual( + make_headers(user_agent='banana'), + {'user-agent': 'banana'}) + + self.assertEqual( + make_headers(keep_alive=True), + {'connection': 'keep-alive'}) + + self.assertEqual( + make_headers(basic_auth='foo:bar'), + {'authorization': 'Basic Zm9vOmJhcg=='}) + + + def test_split_first(self): + test_cases = { + ('abcd', 'b'): ('a', 'cd', 'b'), + ('abcd', 'cb'): ('a', 'cd', 'b'), + ('abcd', ''): ('abcd', '', None), + ('abcd', 'a'): ('', 'bcd', 'a'), + ('abcd', 'ab'): ('', 'bcd', 'a'), + } + for input, expected in test_cases.items(): + output = split_first(*input) + self.assertEqual(output, expected) + + def test_add_stderr_logger(self): + handler = add_stderr_logger(level=logging.INFO) # Don't actually print debug + logger = logging.getLogger('urllib3') + self.assertTrue(handler in logger.handlers) + + logger.debug('Testing add_stderr_logger') + logger.removeHandler(handler) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 17d8a02..4e79ea8 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.3 +Version: 1.5 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -17,7 +17,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -109,6 +109,43 @@ Description: Highlights Changes ======= + 1.5 (2012-08-02) + ++++++++++++++++ + + * Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug + logging in urllib3. + + * Native full URL parsing (including auth, path, query, fragment) available in + ``urllib3.util.parse_url(url)``. + + * Built-in redirect will switch method to 'GET' if status code is 303. + (Issue #11) + + * ``urllib3.PoolManager`` strips the scheme and host before sending the request + uri. (Issue #8) + + * New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, + based on the Content-Type header, fails. + + * Fixed bug with pool depletion and leaking connections (Issue #76). Added + explicit connection closing on pool eviction. Added + ``urllib3.PoolManager.clear()``. + + * 99% -> 100% unit test coverage. + + + 1.4 (2012-06-16) + ++++++++++++++++ + + * Minor AppEngine-related fixes. + + * Switched from ``mimetools.choose_boundary`` to ``uuid.uuid4()``. + + * Improved url parsing. (Issue #73) + + * IPv6 url support. (Issue #72) + + 1.3 (2012-03-25) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index d79710b..3155626 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -10,13 +10,12 @@ dummyserver/__init__.py dummyserver/handlers.py dummyserver/server.py dummyserver/testcase.py -test/__init__.py -test/benchmark.py test/test_collections.py test/test_connectionpool.py test/test_filepost.py test/test_poolmanager.py test/test_response.py +test/test_util.py urllib3/__init__.py urllib3/_collections.py urllib3/connectionpool.py @@ -30,9 +29,7 @@ urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt urllib3.egg-info/top_level.txt -urllib3/contrib/__init__.py -urllib3/contrib/ntlmpool.py urllib3/packages/__init__.py +urllib3/packages/ordered_dict.py urllib3/packages/six.py -urllib3/packages/mimetools_choose_boundary/__init__.py urllib3/packages/ssl_match_hostname/__init__.py \ No newline at end of file diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 2d6fece..b552543 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.3' +__version__ = '1.5' from .connectionpool import ( @@ -28,7 +28,7 @@ from .util import make_headers, get_host # Set default logging handler to avoid "No handler found" warnings. import logging -try: +try: # Python 2.7+ from logging import NullHandler except ImportError: class NullHandler(logging.Handler): @@ -37,6 +37,22 @@ except ImportError: logging.getLogger(__name__).addHandler(NullHandler()) +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. + """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug('Added an stderr logging handler to logger: %s' % __name__) + return handler + # ... Clean up. -del logging del NullHandler diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 3cef081..a052b1d 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -4,128 +4,91 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import deque +from collections import MutableMapping +from threading import Lock -from threading import RLock +try: # Python 2.7+ + from collections import OrderedDict +except ImportError: + from .packages.ordered_dict import OrderedDict -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer'] -class AccessEntry(object): - __slots__ = ('key', 'is_valid') - def __init__(self, key, is_valid=True): - self.key = key - self.is_valid = is_valid +_Null = object() -class RecentlyUsedContainer(dict): - """ - Provides a dict-like that maintains up to ``maxsize`` keys while throwing - away the least-recently-used keys beyond ``maxsize``. +class RecentlyUsedContainer(MutableMapping): """ + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. - # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we - # will attempt to cleanup the invalidated entries in the access_log - # datastructure during the next 'get' operation. - CLEANUP_FACTOR = 10 - - def __init__(self, maxsize=10): - self._maxsize = maxsize - - self._container = {} - - # We use a deque to to store our keys ordered by the last access. - self.access_log = deque() - self.access_log_lock = RLock() - - # We look up the access log entry by the key to invalidate it so we can - # insert a new authorative entry at the head without having to dig and - # find the old entry for removal immediately. - self.access_lookup = {} - - # Trigger a heap cleanup when we get past this size - self.access_log_limit = maxsize * self.CLEANUP_FACTOR - - def _invalidate_entry(self, key): - "If exists: Invalidate old entry and return it." - old_entry = self.access_lookup.get(key) - if old_entry: - old_entry.is_valid = False + :param maxsize: + Maximum number of recent elements to retain. - return old_entry - - def _push_entry(self, key): - "Push entry onto our access log, invalidate the old entry if exists." - self._invalidate_entry(key) - - new_entry = AccessEntry(key) - self.access_lookup[key] = new_entry - - self.access_log_lock.acquire() - self.access_log.appendleft(new_entry) - self.access_log_lock.release() - - def _prune_entries(self, num): - "Pop entries from our access log until we popped ``num`` valid ones." - while num > 0: - self.access_log_lock.acquire() - p = self.access_log.pop() - self.access_log_lock.release() - - if not p.is_valid: - continue # Invalidated entry, skip - - dict.pop(self, p.key, None) - self.access_lookup.pop(p.key, None) - num -= 1 + :param dispose_func: + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called + """ - def _prune_invalidated_entries(self): - "Rebuild our access_log without the invalidated entries." - self.access_log_lock.acquire() - self.access_log = deque(e for e in self.access_log if e.is_valid) - self.access_log_lock.release() + ContainerCls = OrderedDict - def _get_ordered_access_keys(self): - "Return ordered access keys for inspection. Used for testing." - self.access_log_lock.acquire() - r = [e.key for e in self.access_log if e.is_valid] - self.access_log_lock.release() + def __init__(self, maxsize=10, dispose_func=None): + self._maxsize = maxsize + self.dispose_func = dispose_func - return r + self._container = self.ContainerCls() + self._lock = Lock() def __getitem__(self, key): - item = dict.get(self, key) + # Re-insert the item, moving it to the end of the eviction line. + with self._lock: + item = self._container.pop(key) + self._container[key] = item + return item + + def __setitem__(self, key, value): + evicted_value = _Null + with self._lock: + # Possibly evict the existing value of 'key' + evicted_value = self._container.get(key, _Null) + self._container[key] = value + + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_value = self._container.popitem(last=False) + + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) - if not item: - raise KeyError(key) + def __delitem__(self, key): + with self._lock: + value = self._container.pop(key) - # Insert new entry with new high priority, also implicitly invalidates - # the old entry. - self._push_entry(key) + if self.dispose_func: + self.dispose_func(value) - if len(self.access_log) > self.access_log_limit: - # Heap is getting too big, try to clean up any tailing invalidated - # entries. - self._prune_invalidated_entries() + def __len__(self): + with self._lock: + return len(self._container) - return item + def __iter__(self): + raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') - def __setitem__(self, key, item): - # Add item to our container and access log - dict.__setitem__(self, key, item) - self._push_entry(key) + def clear(self): + with self._lock: + # Copy pointers to all values, then wipe the mapping + # under Python 2, this copies the list of values twice :-| + values = list(self._container.values()) + self._container.clear() - # Discard invalid and excess entries - self._prune_entries(len(self) - self._maxsize) + if self.dispose_func: + for value in values: + self.dispose_func(value) - def __delitem__(self, key): - self._invalidate_entry(key) - self.access_lookup.pop(key, None) - dict.__delitem__(self, key) - - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default + def keys(self): + with self._lock: + return self._container.keys() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index c3cb3b1..97da544 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -7,27 +7,27 @@ import logging import socket -from socket import error as SocketError, timeout as SocketTimeout +from socket import timeout as SocketTimeout -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection, HTTPException from http.client import HTTP_PORT, HTTPS_PORT except ImportError: from httplib import HTTPConnection, HTTPException from httplib import HTTP_PORT, HTTPS_PORT -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full -try: # Compiled with SSL? +try: # Compiled with SSL? HTTPSConnection = object BaseSSLError = None ssl = None - try: # Python 3 + try: # Python 3 from http.client import HTTPSConnection except ImportError: from httplib import HTTPSConnection @@ -35,7 +35,7 @@ try: # Compiled with SSL? import ssl BaseSSLError = ssl.SSLError -except (ImportError, AttributeError): +except (ImportError, AttributeError): # Platform-specific: No SSL. pass @@ -43,6 +43,7 @@ from .request import RequestMethods from .response import HTTPResponse from .util import get_host, is_connection_dropped from .exceptions import ( + ClosedPoolError, EmptyPoolError, HostChangedError, MaxRetryError, @@ -206,10 +207,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - # If this is a persistent connection, check if it got disconnected - if conn and is_connection_dropped(conn): - log.info("Resetting dropped connection: %s" % self.host) - conn.close() + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") except Empty: if self.block: @@ -218,6 +217,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): "connections are allowed.") pass # Oh well, we'll create a new connection then + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + return conn or self._new_conn() def _put_conn(self, conn): @@ -228,17 +232,26 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Connection object for the current host and port as returned by :meth:`._new_conn` or :meth:`._get_conn`. - If the pool is already full, the connection is discarded because we - exceeded maxsize. If connections are discarded frequently, then maxsize - should be increased. + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. """ try: self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass except Full: # This should never happen if self.block == True log.warning("HttpConnectionPool is full, discarding connection: %s" % self.host) + # Connection never got put back into the pool, close it. + conn.close() + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ @@ -258,21 +271,42 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if sock: sock.settimeout(timeout) - httplib_response = conn.getresponse() - - log.debug("\"%s %s %s\" %s %s" % - (method, url, - conn._http_vsn_str, # pylint: disable-msg=W0212 - httplib_response.status, httplib_response.length)) + try: # Python 2.7+, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older + httplib_response = conn.getresponse() + # AppEngine doesn't have a version attr. + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') + log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, + httplib_response.status, + httplib_response.length)) return httplib_response + def close(self): + """ + Close all pooled connections and disable the pool. + """ + # Disable access to the pool + old_pool, self.pool = self.pool, None + + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + + except Empty: + pass # Done. def is_same_host(self, url): """ Check if the given ``url`` is a member of the same host as this connection pool. """ + if url.startswith('/'): + return True + # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) @@ -280,8 +314,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) - return (url.startswith('/') or - (scheme, host, port) == (self.scheme, self.host, self.port)) + return (scheme, host, port) == (self.scheme, self.host, self.port) def urlopen(self, method, url, body=None, headers=None, retries=3, redirect=True, assert_same_host=True, timeout=_Default, @@ -320,8 +353,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Number of retries to allow before raising a MaxRetryError exception. :param redirect: - Automatically handle redirects (status codes 301, 302, 303, 307), - each redirect counts as a retry. + If True, automatically handle redirects (status codes 301, 302, + 303, 307). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -374,7 +407,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: # Request a connection from the queue - # (Could raise SocketError: Bad file descriptor) conn = self._get_conn(timeout=pool_timeout) # Make the request on the httplib connection object @@ -417,29 +449,38 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Name mismatch raise SSLError(e) - except (HTTPException, SocketError) as e: + except HTTPException as e: # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e finally: - if conn and release_conn: - # Put the connection back to be reused + if release_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. self._put_conn(conn) if not conn: + # Try again log.warn("Retrying (%d attempts remain) after connection " "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, - redirect, assert_same_host) # Try again + redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) # Handle redirect? redirect_location = redirect and response.get_redirect_location() if redirect_location: + if response.status == 303: + method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host) + retries - 1, redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py deleted file mode 100644 index bb41fd1..0000000 --- a/urllib3/contrib/ntlmpool.py +++ /dev/null @@ -1,120 +0,0 @@ -# urllib3/contrib/ntlmpool.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - -""" -NTLM authenticating pool, contributed by erikcederstran - -Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 -""" - -try: - from http.client import HTTPSConnection -except ImportError: - from httplib import HTTPSConnection -from logging import getLogger -from ntlm import ntlm - -from urllib3 import HTTPSConnectionPool - - -log = getLogger(__name__) - - -class NTLMConnectionPool(HTTPSConnectionPool): - """ - Implements an NTLM authentication version of an urllib3 connection pool - """ - - scheme = 'https' - - def __init__(self, user, pw, authurl, *args, **kwargs): - """ - authurl is a random URL on the server that is protected by NTLM. - user is the Windows user, probably in the DOMAIN\username format. - pw is the password for the user. - """ - super(NTLMConnectionPool, self).__init__(*args, **kwargs) - self.authurl = authurl - self.rawuser = user - user_parts = user.split('\\', 1) - self.domain = user_parts[0].upper() - self.user = user_parts[1] - self.pw = pw - - def _new_conn(self): - # Performs the NTLM handshake that secures the connection. The socket - # must be kept open while requests are performed. - self.num_connections += 1 - log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % - (self.num_connections, self.host, self.authurl)) - - headers = {} - headers['Connection'] = 'Keep-Alive' - req_header = 'Authorization' - resp_header = 'www-authenticate' - - conn = HTTPSConnection(host=self.host, port=self.port) - - # Send negotiation message - headers[req_header] = ( - 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) - log.debug('Request headers: %s' % headers) - conn.request('GET', self.authurl, None, headers) - res = conn.getresponse() - reshdr = dict(res.getheaders()) - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % reshdr) - log.debug('Response data: %s [...]' % res.read(100)) - - # Remove the reference to the socket, so that it can not be closed by - # the response object (we want to keep the socket open) - res.fp = None - - # Server should respond with a challenge message - auth_header_values = reshdr[resp_header].split(', ') - auth_header_value = None - for s in auth_header_values: - if s[:5] == 'NTLM ': - auth_header_value = s[5:] - if auth_header_value is None: - raise Exception('Unexpected %s response header: %s' % - (resp_header, reshdr[resp_header])) - - # Send authentication message - ServerChallenge, NegotiateFlags = \ - ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) - auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, - self.user, - self.domain, - self.pw, - NegotiateFlags) - headers[req_header] = 'NTLM %s' % auth_msg - log.debug('Request headers: %s' % headers) - conn.request('GET', self.authurl, None, headers) - res = conn.getresponse() - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % dict(res.getheaders())) - log.debug('Response data: %s [...]' % res.read()[:100]) - if res.status != 200: - if res.status == 401: - raise Exception('Server rejected request: wrong ' - 'username or password') - raise Exception('Wrong server response: %s %s' % - (res.status, res.reason)) - - res.fp = None - log.debug('Connection established') - return conn - - def urlopen(self, method, url, body=None, headers=None, retries=3, - redirect=True, assert_same_host=True): - if headers is None: - headers = {} - headers['Connection'] = 'Keep-Alive' - return super(NTLMConnectionPool, self).urlopen(method, url, body, - headers, retries, - redirect, - assert_same_host) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 15c9699..99ebb67 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -24,6 +24,11 @@ class SSLError(HTTPError): pass +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." + pass + + ## Leaf Exceptions class MaxRetryError(PoolError): @@ -57,6 +62,11 @@ class EmptyPoolError(PoolError): pass +class ClosedPoolError(PoolError): + "Raised when a request enters a pool after the pool has been closed." + pass + + class LocationParseError(ValueError, HTTPError): "Raised when get_host or similar fails to parse the URL input." diff --git a/urllib3/filepost.py b/urllib3/filepost.py index 344a103..e679b93 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -7,11 +7,7 @@ import codecs import mimetypes -try: - from mimetools import choose_boundary -except ImportError: - from .packages.mimetools_choose_boundary import choose_boundary - +from uuid import uuid4 from io import BytesIO from .packages import six @@ -20,6 +16,13 @@ from .packages.six import b writer = codecs.lookup('utf-8')[3] +def choose_boundary(): + """ + Our embarassingly-simple replacement for mimetools.choose_boundary. + """ + return uuid4().hex + + def get_content_type(filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' diff --git a/urllib3/packages/mimetools_choose_boundary/__init__.py b/urllib3/packages/mimetools_choose_boundary/__init__.py deleted file mode 100644 index a0109ab..0000000 --- a/urllib3/packages/mimetools_choose_boundary/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -"""The function mimetools.choose_boundary() from Python 2.7, which seems to -have disappeared in Python 3 (although email.generator._make_boundary() might -work as a replacement?). - -Tweaked to use lock from threading rather than thread. -""" -import os -from threading import Lock -_counter_lock = Lock() - -_counter = 0 -def _get_next_counter(): - global _counter - with _counter_lock: - _counter += 1 - return _counter - -_prefix = None - -def choose_boundary(): - """Return a string usable as a multipart boundary. - - The string chosen is unique within a single program run, and - incorporates the user id (if available), process id (if available), - and current time. So it's very unlikely the returned string appears - in message text, but there's no guarantee. - - The boundary contains dots so you have to quote it in the header.""" - - global _prefix - import time - if _prefix is None: - import socket - try: - hostid = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - hostid = '127.0.0.1' - try: - uid = repr(os.getuid()) - except AttributeError: - uid = '1' - try: - pid = repr(os.getpid()) - except AttributeError: - pid = '1' - _prefix = hostid + '.' + uid + '.' + pid - return "%s.%.3f.%d" % (_prefix, time.time(), _get_next_counter()) diff --git a/urllib3/packages/ordered_dict.py b/urllib3/packages/ordered_dict.py new file mode 100644 index 0000000..7f8ee15 --- /dev/null +++ b/urllib3/packages/ordered_dict.py @@ -0,0 +1,260 @@ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# Copyright 2009 Raymond Hettinger, released under the MIT License. +# http://code.activestate.com/recipes/576693/ + +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 310ea21..8f5b54c 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -8,9 +8,9 @@ import logging from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import get_host, connection_from_url, port_by_scheme -from .exceptions import HostChangedError +from .connectionpool import connection_from_url, port_by_scheme from .request import RequestMethods +from .util import parse_url __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -48,19 +48,29 @@ class PoolManager(RequestMethods): """ - # TODO: Make sure there are no memory leaks here. - def __init__(self, num_pools=10, **connection_pool_kw): self.connection_pool_kw = connection_pool_kw - self.pools = RecentlyUsedContainer(num_pools) + self.pools = RecentlyUsedContainer(num_pools, + dispose_func=lambda p: p.close()) + + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() - def connection_from_host(self, host, port=80, scheme='http'): + def connection_from_host(self, host, port=None, scheme='http'): """ Get a :class:`ConnectionPool` based on the host, port, and scheme. - Note that an appropriate ``port`` value is required here to normalize - connection pools in our container most effectively. + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. """ + port = port or port_by_scheme.get(scheme, 80) + pool_key = (scheme, host, port) # If the scheme, host, or port doesn't match existing open connections, @@ -86,26 +96,36 @@ class PoolManager(RequestMethods): Additional parameters are taken from the :class:`.PoolManager` constructor. """ - scheme, host, port = get_host(url) - - port = port or port_by_scheme.get(scheme, 80) - - return self.connection_from_host(host, port=port, scheme=scheme) + u = parse_url(url) + return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): """ - Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with custom cross-host redirect logic and only sends the request-uri + portion of the ``url``. - ``url`` must be absolute, such that an appropriate + The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ - conn = self.connection_from_url(url) - try: - return conn.urlopen(method, url, **kw) + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + + kw['assert_same_host'] = False + kw['redirect'] = False + + response = conn.urlopen(method, u.request_uri, **kw) + + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + if response.status == 303: + method = 'GET' - except HostChangedError as e: - kw['retries'] = e.retries # Persist retries countdown - return self.urlopen(method, e.url, **kw) + log.info("Redirecting %s -> %s" % (url, redirect_location)) + kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + return self.urlopen(method, redirect_location, **kw) class ProxyManager(RequestMethods): diff --git a/urllib3/response.py b/urllib3/response.py index 5fab824..28537d3 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -10,7 +10,7 @@ import zlib from io import BytesIO -from .exceptions import HTTPError +from .exceptions import DecodeError from .packages.six import string_types as basestring @@ -148,9 +148,9 @@ class HTTPResponse(object): try: if decode_content and decoder: data = decoder(data) - except IOError: - raise HTTPError("Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding) + except (IOError, zlib.error): + raise DecodeError("Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding) if cache_content: self._body = data diff --git a/urllib3/util.py b/urllib3/util.py index 2684a2f..8ec990b 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -6,6 +6,8 @@ from base64 import b64encode +from collections import namedtuple +from socket import error as SocketError try: from select import poll, POLLIN @@ -20,6 +22,152 @@ from .packages import six from .exceptions import LocationParseError +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example: :: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example: :: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> prase_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> prase_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this imeplementations does silly things to be optimal + # on CPython. + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + auth, url = url.split('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url[1:].split(']', 1) + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + + port = int(port) + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port + + def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, basic_auth=None): """ @@ -72,60 +220,28 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, return headers -def get_host(url): - """ - Given a url, return its scheme, host and port (None if it's not there). - - For example: :: - - >>> get_host('http://google.com/mail/') - ('http', 'google.com', None) - >>> get_host('google.com:80') - ('http', 'google.com', 80) - """ - - # This code is actually similar to urlparse.urlsplit, but much - # simplified for our needs. - port = None - scheme = 'http' - - if '://' in url: - scheme, url = url.split('://', 1) - if '/' in url: - url, _path = url.split('/', 1) - if '@' in url: - _auth, url = url.split('@', 1) - if ':' in url: - url, port = url.split(':', 1) - - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - - port = int(port) - - return scheme, url, port - - - def is_connection_dropped(conn): """ Returns True if the connection is dropped and should be closed. :param conn: - ``HTTPConnection`` object. + :class:`httplib.HTTPConnection` object. Note: For platforms like AppEngine, this will always return ``False`` to let the platform handle connection recycling transparently for us. """ sock = getattr(conn, 'sock', False) - if not sock: #Platform-specific: AppEngine + if not sock: # Platform-specific: AppEngine return False if not poll: # Platform-specific - if not select: #Platform-specific: AppEngine + if not select: # Platform-specific: AppEngine return False - return select([sock], [], [], 0.0)[0] + try: + return select([sock], [], [], 0.0)[0] + except SocketError: + return True # This version is better on platforms that support it. p = poll() -- cgit v1.2.3 From 92b84b67f7b187b81dacbf1ae46d59a1d0b5b125 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:33 -0700 Subject: Imported Upstream version 1.6 --- CHANGES.rst | 60 ++++++++++++++++ CONTRIBUTORS.txt | 15 ++++ LICENSE.txt | 2 +- PKG-INFO | 74 ++++++++++++++++++- README.rst | 10 ++- dummyserver/handlers.py | 10 ++- dummyserver/server.py | 1 + dummyserver/testcase.py | 6 ++ setup.cfg | 1 + setup.py | 5 +- test-requirements.txt | 3 +- test/test_collections.py | 4 +- test/test_connectionpool.py | 45 ++++++++---- test/test_exceptions.py | 19 +++++ test/test_filepost.py | 42 +++++++++-- test/test_poolmanager.py | 6 +- test/test_proxymanager.py | 27 +++++++ test/test_response.py | 48 +++++++++++++ urllib3.egg-info/PKG-INFO | 74 ++++++++++++++++++- urllib3.egg-info/SOURCES.txt | 5 ++ urllib3/__init__.py | 4 +- urllib3/_collections.py | 2 +- urllib3/connectionpool.py | 99 ++++++++++++++++--------- urllib3/contrib/__init__.py | 0 urllib3/contrib/ntlmpool.py | 120 +++++++++++++++++++++++++++++++ urllib3/contrib/pyopenssl.py | 167 +++++++++++++++++++++++++++++++++++++++++++ urllib3/exceptions.py | 18 +++-- urllib3/filepost.py | 27 ++++--- urllib3/packages/six.py | 53 ++++++++------ urllib3/poolmanager.py | 74 +++++++++++++------ urllib3/request.py | 28 ++++++-- urllib3/response.py | 95 ++++++++++++++++-------- urllib3/util.py | 142 +++++++++++++++++++++++++++++++++--- 33 files changed, 1116 insertions(+), 170 deletions(-) create mode 100644 test/test_exceptions.py create mode 100644 test/test_proxymanager.py create mode 100644 urllib3/contrib/__init__.py create mode 100644 urllib3/contrib/ntlmpool.py create mode 100644 urllib3/contrib/pyopenssl.py diff --git a/CHANGES.rst b/CHANGES.rst index a0cbdb3..a2a0da8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,66 @@ Changes ======= +1.6 (2013-04-25) +++++++++++++++++ + +* Contrib: Optional SNI support for Py2 using PyOpenSSL. (Issue #156) + +* ``ProxyManager`` automatically adds ``Host: ...`` header if not given. + +* Improved SSL-related code. ``cert_req`` now optionally takes a string like + "REQUIRED" or "NONE". Same with ``ssl_version`` takes strings like "SSLv23" + The string values reflect the suffix of the respective constant variable. + (Issue #130) + +* Vendored ``socksipy`` now based on Anorov's fork which handles unexpectedly + closed proxy connections and larger read buffers. (Issue #135) + +* Ensure the connection is closed if no data is received, fixes connection leak + on some platforms. (Issue #133) + +* Added SNI support for SSL/TLS connections on Py32+. (Issue #89) + +* Tests fixed to be compatible with Py26 again. (Issue #125) + +* Added ability to choose SSL version by passing an ``ssl.PROTOCOL_*`` constant + to the ``ssl_version`` parameter of ``HTTPSConnectionPool``. (Issue #109) + +* Allow an explicit content type to be specified when encoding file fields. + (Issue #126) + +* Exceptions are now pickleable, with tests. (Issue #101) + +* Fixed default headers not getting passed in some cases. (Issue #99) + +* Treat "content-encoding" header value as case-insensitive, per RFC 2616 + Section 3.5. (Issue #110) + +* "Connection Refused" SocketErrors will get retried rather than raised. + (Issue #92) + +* Updated vendored ``six``, no longer overrides the global ``six`` module + namespace. (Issue #113) + +* ``urllib3.exceptions.MaxRetryError`` contains a ``reason`` property holding + the exception that prompted the final retry. If ``reason is None`` then it + was due to a redirect. (Issue #92, #114) + +* Fixed ``PoolManager.urlopen()`` from not redirecting more than once. + (Issue #149) + +* Don't assume ``Content-Type: text/plain`` for multi-part encoding parameters + that are not files. (Issue #111) + +* Pass `strict` param down to ``httplib.HTTPConnection``. (Issue #122) + +* Added mechanism to verify SSL certificates by fingerprint (md5, sha1) or + against an arbitrary hostname (when connecting by IP or for misconfigured + servers). (Issue #140) + +* Streaming decompression support. (Issue #159) + + 1.5 (2012-08-02) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 7dfbcaf..19f59ce 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -45,5 +45,20 @@ In chronological order: * Shivaram Lingamneni * Support for explicitly closing pooled connections +* hartator + * Corrected multipart behavior for params + +* Thomas Weißschuh + * Support for TLS SNI + * API unification of ssl_version/cert_reqs + * SSL fingerprint and alternative hostname verification + * Bugfixes in testsuite + +* Sune Kirkeby + * Optional SNI-support for Python 2 via PyOpenSSL. + +* Marc Schlaich + * Various bugfixes and test improvements. + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/LICENSE.txt b/LICENSE.txt index f658ad6..31f0b6c 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ This is the MIT license: http://www.opensource.org/licenses/mit-license.php -Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software diff --git a/PKG-INFO b/PKG-INFO index 4e79ea8..661e33b 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,12 +1,20 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.5 +Version: 1.6 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov Author-email: andrey.petrov@shazow.net License: MIT -Description: Highlights +Description: ======= + urllib3 + ======= + + .. image:: https://travis-ci.org/shazow/urllib3.png?branch=master + :target: https://travis-ci.org/shazow/urllib3 + + + Highlights ========== - Re-use the same socket connection for multiple requests @@ -17,7 +25,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -109,6 +117,66 @@ Description: Highlights Changes ======= + 1.6 (2013-04-25) + ++++++++++++++++ + + * Contrib: Optional SNI support for Py2 using PyOpenSSL. (Issue #156) + + * ``ProxyManager`` automatically adds ``Host: ...`` header if not given. + + * Improved SSL-related code. ``cert_req`` now optionally takes a string like + "REQUIRED" or "NONE". Same with ``ssl_version`` takes strings like "SSLv23" + The string values reflect the suffix of the respective constant variable. + (Issue #130) + + * Vendored ``socksipy`` now based on Anorov's fork which handles unexpectedly + closed proxy connections and larger read buffers. (Issue #135) + + * Ensure the connection is closed if no data is received, fixes connection leak + on some platforms. (Issue #133) + + * Added SNI support for SSL/TLS connections on Py32+. (Issue #89) + + * Tests fixed to be compatible with Py26 again. (Issue #125) + + * Added ability to choose SSL version by passing an ``ssl.PROTOCOL_*`` constant + to the ``ssl_version`` parameter of ``HTTPSConnectionPool``. (Issue #109) + + * Allow an explicit content type to be specified when encoding file fields. + (Issue #126) + + * Exceptions are now pickleable, with tests. (Issue #101) + + * Fixed default headers not getting passed in some cases. (Issue #99) + + * Treat "content-encoding" header value as case-insensitive, per RFC 2616 + Section 3.5. (Issue #110) + + * "Connection Refused" SocketErrors will get retried rather than raised. + (Issue #92) + + * Updated vendored ``six``, no longer overrides the global ``six`` module + namespace. (Issue #113) + + * ``urllib3.exceptions.MaxRetryError`` contains a ``reason`` property holding + the exception that prompted the final retry. If ``reason is None`` then it + was due to a redirect. (Issue #92, #114) + + * Fixed ``PoolManager.urlopen()`` from not redirecting more than once. + (Issue #149) + + * Don't assume ``Content-Type: text/plain`` for multi-part encoding parameters + that are not files. (Issue #111) + + * Pass `strict` param down to ``httplib.HTTPConnection``. (Issue #122) + + * Added mechanism to verify SSL certificates by fingerprint (md5, sha1) or + against an arbitrary hostname (when connecting by IP or for misconfigured + servers). (Issue #140) + + * Streaming decompression support. (Issue #159) + + 1.5 (2012-08-02) ++++++++++++++++ diff --git a/README.rst b/README.rst index 144df0e..75f05d8 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,11 @@ +======= +urllib3 +======= + +.. image:: https://travis-ci.org/shazow/urllib3.png?branch=master + :target: https://travis-ci.org/shazow/urllib3 + + Highlights ========== @@ -9,7 +17,7 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. +- Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index ca809ad..ab48b53 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -1,6 +1,7 @@ from __future__ import print_function import gzip +import json import logging import sys import time @@ -120,7 +121,7 @@ class TestingApp(WSGIHandler): return Response(status='303', headers=headers) def keepalive(self, request): - if request.params.get('close', '0') == '1': + if request.params.get('close', b'0') == b'1': headers = [('Connection', 'close')] return Response('Closing', headers=headers) @@ -148,7 +149,9 @@ class TestingApp(WSGIHandler): if encoding == 'gzip': headers = [('Content-Encoding', 'gzip')] file_ = BytesIO() - gzip.GzipFile('', mode='w', fileobj=file_).write(data) + zipfile = gzip.GzipFile('', mode='w', fileobj=file_) + zipfile.write(data) + zipfile.close() data = file_.getvalue() elif encoding == 'deflate': headers = [('Content-Encoding', 'deflate')] @@ -161,5 +164,8 @@ class TestingApp(WSGIHandler): data = 'garbage' return Response(data, headers=headers) + def headers(self, request): + return Response(json.dumps(request.headers)) + def shutdown(self, request): sys.exit() diff --git a/dummyserver/server.py b/dummyserver/server.py index 6c0943c..9031664 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -60,6 +60,7 @@ class SocketServerThread(threading.Thread): self.ready_lock.release() self.socket_handler(sock) + sock.close() def run(self): self.server = self._start_server() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 518d739..73b8f2f 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -32,6 +32,11 @@ class SocketDummyServerTestCase(unittest.TestCase): # Lock gets released by thread above ready_lock.acquire() + @classmethod + def tearDownClass(cls): + if hasattr(cls, 'server_thread'): + cls.server_thread.join() + class HTTPDummyServerTestCase(unittest.TestCase): scheme = 'http' @@ -54,6 +59,7 @@ class HTTPDummyServerTestCase(unittest.TestCase): @classmethod def _stop_server(cls): cls.server_thread.stop() + cls.server_thread.join() @classmethod def setUpClass(cls): diff --git a/setup.cfg b/setup.cfg index 58ce3f5..8f6983c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,6 +2,7 @@ logging-clear-handlers = true with-coverage = true cover-package = urllib3 +cover-min-percentage = 100 [egg_info] tag_build = diff --git a/setup.py b/setup.py index 84d6e7f..392b885 100644 --- a/setup.py +++ b/setup.py @@ -44,8 +44,9 @@ setup(name='urllib3', author_email='andrey.petrov@shazow.net', url='http://urllib3.readthedocs.org/', license='MIT', - packages=['urllib3', 'dummyserver', 'urllib3.packages', - 'urllib3.packages.ssl_match_hostname', + packages=['urllib3', 'dummyserver', + 'urllib3.packages', 'urllib3.packages.ssl_match_hostname', + 'urllib3.contrib', ], requires=requirements, tests_require=tests_requirements, diff --git a/test-requirements.txt b/test-requirements.txt index e2d1579..226c13d 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,3 @@ nose -tornado==2.1.1 +tornado==2.4.1 +coverage diff --git a/test/test_collections.py b/test/test_collections.py index 098b31a..b44c58a 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -119,9 +119,7 @@ class TestLRUContainer(unittest.TestCase): def test_iter(self): d = Container() - with self.assertRaises(NotImplementedError): - for i in d: - self.fail("Iteration shouldn't be implemented.") + self.assertRaises(NotImplementedError, d.__iter__) if __name__ == '__main__': unittest.main() diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index afc3098..a7e104a 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -11,7 +11,7 @@ from urllib3.exceptions import ( TimeoutError, ) -from socket import timeout as SocketTimeout +from socket import error as SocketError, timeout as SocketTimeout from ssl import SSLError as BaseSSLError try: # Python 3 @@ -23,6 +23,10 @@ except ImportError: class TestConnectionPool(unittest.TestCase): + """ + Tests in this suite should exercise the ConnectionPool functionality + without actually making any network requests or connections. + """ def test_same_host(self): same_host = [ ('http://google.com/', '/'), @@ -86,6 +90,24 @@ class TestConnectionPool(unittest.TestCase): str(EmptyPoolError(HTTPConnectionPool(host='localhost'), "Test.")), "HTTPConnectionPool(host='localhost', port=None): Test.") + def test_retry_exception_str(self): + self.assertEqual( + str(MaxRetryError( + HTTPConnectionPool(host='localhost'), "Test.", None)), + "HTTPConnectionPool(host='localhost', port=None): " + "Max retries exceeded with url: Test. (Caused by redirect)") + + err = SocketError("Test") + + # using err.__class__ here, as socket.error is an alias for OSError + # since Py3.3 and gets printed as this + self.assertEqual( + str(MaxRetryError( + HTTPConnectionPool(host='localhost'), "Test.", err)), + "HTTPConnectionPool(host='localhost', port=None): " + "Max retries exceeded with url: Test. " + "(Caused by {0}: Test)".format(str(err.__class__))) + def test_pool_size(self): POOL_SIZE = 1 pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True) @@ -95,8 +117,7 @@ class TestConnectionPool(unittest.TestCase): def _test(exception, expect): pool._make_request = lambda *args, **kwargs: _raise(exception) - with self.assertRaises(expect): - pool.request('GET', '/') + self.assertRaises(expect, pool.request, 'GET', '/') self.assertEqual(pool.pool.qsize(), POOL_SIZE) @@ -111,15 +132,15 @@ class TestConnectionPool(unittest.TestCase): # MaxRetryError, not EmptyPoolError # See: https://github.com/shazow/urllib3/issues/76 pool._make_request = lambda *args, **kwargs: _raise(HTTPException) - with self.assertRaises(MaxRetryError): - pool.request('GET', '/', retries=1, pool_timeout=0.01) + self.assertRaises(MaxRetryError, pool.request, + 'GET', '/', retries=1, pool_timeout=0.01) self.assertEqual(pool.pool.qsize(), POOL_SIZE) def test_assert_same_host(self): c = connection_from_url('http://google.com:80') - with self.assertRaises(HostChangedError): - c.request('GET', 'http://yahoo.com:80', assert_same_host=True) + self.assertRaises(HostChangedError, c.request, + 'GET', 'http://yahoo.com:80', assert_same_host=True) def test_pool_close(self): pool = connection_from_url('http://google.com:80') @@ -136,16 +157,14 @@ class TestConnectionPool(unittest.TestCase): pool.close() self.assertEqual(pool.pool, None) - with self.assertRaises(ClosedPoolError): - pool._get_conn() + self.assertRaises(ClosedPoolError, pool._get_conn) pool._put_conn(conn3) - with self.assertRaises(ClosedPoolError): - pool._get_conn() + self.assertRaises(ClosedPoolError, pool._get_conn) + + self.assertRaises(Empty, old_pool_queue.get, block=False) - with self.assertRaises(Empty): - old_pool_queue.get(block=False) if __name__ == '__main__': diff --git a/test/test_exceptions.py b/test/test_exceptions.py new file mode 100644 index 0000000..3e02ca6 --- /dev/null +++ b/test/test_exceptions.py @@ -0,0 +1,19 @@ +import unittest +import pickle + +from urllib3.exceptions import HTTPError, MaxRetryError, LocationParseError +from urllib3.connectionpool import HTTPConnectionPool + + + +class TestPickle(unittest.TestCase): + + def test_exceptions(self): + assert pickle.dumps(HTTPError(None)) + assert pickle.dumps(MaxRetryError(None, None, None)) + assert pickle.dumps(LocationParseError(None)) + + def test_exceptions_with_objects(self): + assert pickle.dumps(HTTPError('foo')) + assert pickle.dumps(MaxRetryError(HTTPConnectionPool('localhost'), '/', None)) + assert pickle.dumps(LocationParseError('fake location')) diff --git a/test/test_filepost.py b/test/test_filepost.py index c251778..70ab100 100644 --- a/test/test_filepost.py +++ b/test/test_filepost.py @@ -52,19 +52,17 @@ class TestMultipartEncoding(unittest.TestCase): self.assertEqual(encoded, b'--' + b(BOUNDARY) + b'\r\n' b'Content-Disposition: form-data; name="k"\r\n' - b'Content-Type: text/plain\r\n' b'\r\n' b'v\r\n' b'--' + b(BOUNDARY) + b'\r\n' b'Content-Disposition: form-data; name="k2"\r\n' - b'Content-Type: text/plain\r\n' b'\r\n' b'v2\r\n' b'--' + b(BOUNDARY) + b'--\r\n' , fields) self.assertEqual(content_type, - b'multipart/form-data; boundary=' + b(BOUNDARY)) + 'multipart/form-data; boundary=' + str(BOUNDARY)) def test_filename(self): @@ -82,4 +80,40 @@ class TestMultipartEncoding(unittest.TestCase): ) self.assertEqual(content_type, - b'multipart/form-data; boundary=' + b(BOUNDARY)) + 'multipart/form-data; boundary=' + str(BOUNDARY)) + + + def test_textplain(self): + fields = [('k', ('somefile.txt', b'v'))] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) + + self.assertEqual(content_type, + 'multipart/form-data; boundary=' + str(BOUNDARY)) + + + def test_explicit(self): + fields = [('k', ('somefile.txt', b'v', 'image/jpeg'))] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n' + b'Content-Type: image/jpeg\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) + + self.assertEqual(content_type, + 'multipart/form-data; boundary=' + str(BOUNDARY)) diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 273abf9..2faab94 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -54,13 +54,11 @@ class TestPoolManager(unittest.TestCase): p.clear() self.assertEqual(len(p.pools), 0) - with self.assertRaises(ClosedPoolError): - conn_pool._get_conn() + self.assertRaises(ClosedPoolError, conn_pool._get_conn) conn_pool._put_conn(conn) - with self.assertRaises(ClosedPoolError): - conn_pool._get_conn() + self.assertRaises(ClosedPoolError, conn_pool._get_conn) self.assertEqual(len(p.pools), 0) diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py new file mode 100644 index 0000000..64c86e8 --- /dev/null +++ b/test/test_proxymanager.py @@ -0,0 +1,27 @@ +import unittest + +from urllib3.poolmanager import ProxyManager + + +class TestProxyManager(unittest.TestCase): + def test_proxy_headers(self): + p = ProxyManager(None) + url = 'http://pypi.python.org/test' + + # Verify default headers + default_headers = {'Accept': '*/*', + 'Host': 'pypi.python.org'} + headers = p._set_proxy_headers(url) + + self.assertEqual(headers, default_headers) + + # Verify default headers don't overwrite provided headers + provided_headers = {'Accept': 'application/json', + 'custom': 'header', + 'Host': 'test.python.org'} + headers = p._set_proxy_headers(url, provided_headers) + + self.assertEqual(headers, provided_headers) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 964f677..199e379 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -63,6 +63,54 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.data, b'foo') + def test_decode_deflate_case_insensitve(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'DeFlAtE'}) + + self.assertEqual(r.data, b'foo') + + def test_chunked_decoding_deflate(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + + self.assertEqual(r.read(3), b'') + self.assertEqual(r.read(1), b'f') + self.assertEqual(r.read(2), b'oo') + + def test_chunked_decoding_deflate2(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + + self.assertEqual(r.read(1), b'') + self.assertEqual(r.read(1), b'f') + self.assertEqual(r.read(2), b'oo') + + def test_chunked_decoding_gzip(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'gzip'}, + preload_content=False) + + self.assertEqual(r.read(11), b'') + self.assertEqual(r.read(1), b'f') + self.assertEqual(r.read(2), b'oo') if __name__ == '__main__': unittest.main() diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 4e79ea8..661e33b 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,12 +1,20 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.5 +Version: 1.6 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov Author-email: andrey.petrov@shazow.net License: MIT -Description: Highlights +Description: ======= + urllib3 + ======= + + .. image:: https://travis-ci.org/shazow/urllib3.png?branch=master + :target: https://travis-ci.org/shazow/urllib3 + + + Highlights ========== - Re-use the same socket connection for multiple requests @@ -17,7 +25,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -109,6 +117,66 @@ Description: Highlights Changes ======= + 1.6 (2013-04-25) + ++++++++++++++++ + + * Contrib: Optional SNI support for Py2 using PyOpenSSL. (Issue #156) + + * ``ProxyManager`` automatically adds ``Host: ...`` header if not given. + + * Improved SSL-related code. ``cert_req`` now optionally takes a string like + "REQUIRED" or "NONE". Same with ``ssl_version`` takes strings like "SSLv23" + The string values reflect the suffix of the respective constant variable. + (Issue #130) + + * Vendored ``socksipy`` now based on Anorov's fork which handles unexpectedly + closed proxy connections and larger read buffers. (Issue #135) + + * Ensure the connection is closed if no data is received, fixes connection leak + on some platforms. (Issue #133) + + * Added SNI support for SSL/TLS connections on Py32+. (Issue #89) + + * Tests fixed to be compatible with Py26 again. (Issue #125) + + * Added ability to choose SSL version by passing an ``ssl.PROTOCOL_*`` constant + to the ``ssl_version`` parameter of ``HTTPSConnectionPool``. (Issue #109) + + * Allow an explicit content type to be specified when encoding file fields. + (Issue #126) + + * Exceptions are now pickleable, with tests. (Issue #101) + + * Fixed default headers not getting passed in some cases. (Issue #99) + + * Treat "content-encoding" header value as case-insensitive, per RFC 2616 + Section 3.5. (Issue #110) + + * "Connection Refused" SocketErrors will get retried rather than raised. + (Issue #92) + + * Updated vendored ``six``, no longer overrides the global ``six`` module + namespace. (Issue #113) + + * ``urllib3.exceptions.MaxRetryError`` contains a ``reason`` property holding + the exception that prompted the final retry. If ``reason is None`` then it + was due to a redirect. (Issue #92, #114) + + * Fixed ``PoolManager.urlopen()`` from not redirecting more than once. + (Issue #149) + + * Don't assume ``Content-Type: text/plain`` for multi-part encoding parameters + that are not files. (Issue #111) + + * Pass `strict` param down to ``httplib.HTTPConnection``. (Issue #122) + + * Added mechanism to verify SSL certificates by fingerprint (md5, sha1) or + against an arbitrary hostname (when connecting by IP or for misconfigured + servers). (Issue #140) + + * Streaming decompression support. (Issue #159) + + 1.5 (2012-08-02) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 3155626..69ec475 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -12,8 +12,10 @@ dummyserver/server.py dummyserver/testcase.py test/test_collections.py test/test_connectionpool.py +test/test_exceptions.py test/test_filepost.py test/test_poolmanager.py +test/test_proxymanager.py test/test_response.py test/test_util.py urllib3/__init__.py @@ -29,6 +31,9 @@ urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt urllib3.egg-info/top_level.txt +urllib3/contrib/__init__.py +urllib3/contrib/ntlmpool.py +urllib3/contrib/pyopenssl.py urllib3/packages/__init__.py urllib3/packages/ordered_dict.py urllib3/packages/six.py diff --git a/urllib3/__init__.py b/urllib3/__init__.py index b552543..ebd43b3 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -1,5 +1,5 @@ # urllib3/__init__.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.5' +__version__ = '1.6' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index a052b1d..b35a736 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -1,5 +1,5 @@ # urllib3/_collections.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 97da544..73fa9ca 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -1,13 +1,15 @@ # urllib3/connectionpool.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php import logging import socket +import errno -from socket import timeout as SocketTimeout +from socket import error as SocketError, timeout as SocketTimeout +from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -41,7 +43,7 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. from .request import RequestMethods from .response import HTTPResponse -from .util import get_host, is_connection_dropped +from .util import get_host, is_connection_dropped, ssl_wrap_socket from .exceptions import ( ClosedPoolError, EmptyPoolError, @@ -76,32 +78,41 @@ class VerifiedHTTPSConnection(HTTPSConnection): """ cert_reqs = None ca_certs = None + ssl_version = None def set_cert(self, key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): - ssl_req_scheme = { - 'CERT_NONE': ssl.CERT_NONE, - 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, - 'CERT_REQUIRED': ssl.CERT_REQUIRED - } + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): self.key_file = key_file self.cert_file = cert_file - self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint def connect(self): # Add certificate verification sock = socket.create_connection((self.host, self.port), self.timeout) + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs) - if self.ca_certs: - match_hostname(self.sock.getpeercert(), self.host) - + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + else: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) ## Pool objects @@ -166,13 +177,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None): - super(HTTPConnectionPool, self).__init__(host, port) + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) self.strict = strict self.timeout = timeout self.pool = self.QueueCls(maxsize) self.block = block - self.headers = headers or {} # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): @@ -189,7 +200,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, port=self.port) + return HTTPConnection(host=self.host, + port=self.port, + strict=self.strict) def _get_conn(self, timeout=None): """ @@ -449,12 +462,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Name mismatch raise SSLError(e) - except HTTPException as e: + except (HTTPException, SocketError) as e: # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e + if retries == 0: + raise MaxRetryError(self, url, e) + finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -491,11 +507,15 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:httplib.HTTPSConnection`. + instead of :class:`httplib.HTTPSConnection`. - The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters - are only used if :mod:`ssl` is available and are fed into - :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and + ``ssl_version`` are only used if :mod:`ssl` is available and are fed into + :meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket + into an SSL socket. """ scheme = 'https' @@ -503,16 +523,20 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, - key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): + key_file=None, cert_file=None, cert_reqs=None, + ca_certs=None, ssl_version=None, + assert_hostname=None, assert_fingerprint=None): - super(HTTPSConnectionPool, self).__init__(host, port, - strict, timeout, maxsize, - block, headers) + HTTPConnectionPool.__init__(self, host, port, + strict, timeout, maxsize, + block, headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.ssl_version = ssl_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint def _new_conn(self): """ @@ -522,16 +546,25 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) - if not ssl: # Platform-specific: Python compiled without +ssl + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") - return HTTPSConnection(host=self.host, port=self.port) + return HTTPSConnection(host=self.host, + port=self.port, + strict=self.strict) - connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection = VerifiedHTTPSConnection(host=self.host, + port=self.port, + strict=self.strict) connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + + connection.ssl_version = self.ssl_version + return connection diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py new file mode 100644 index 0000000..277ee0b --- /dev/null +++ b/urllib3/contrib/ntlmpool.py @@ -0,0 +1,120 @@ +# urllib3/contrib/ntlmpool.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +try: + from http.client import HTTPSConnection +except ImportError: + from httplib import HTTPSConnection +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py new file mode 100644 index 0000000..5c4c6d8 --- /dev/null +++ b/urllib3/contrib/pyopenssl.py @@ -0,0 +1,167 @@ +'''SSL with SNI-support for Python 2. + +This needs the following packages installed: + +* pyOpenSSL (tested with 0.13) +* ndg-httpsclient (tested with 0.3.2) +* pyasn1 (tested with 0.1.6) + +To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. +This can be done in a ``sitecustomize`` module, or at any other time before +your application begins using ``urllib3``, like this:: + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +Now you can use :mod:`urllib3` as you normally would, and it will support SNI +when the required modules are installed. +''' + +from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, + SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.subj_alt_name import SubjectAltName +import OpenSSL.SSL +from pyasn1.codec.der import decoder as der_decoder +from socket import _fileobject +import ssl + +from .. import connectionpool +from .. import util + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI only *really* works if we can read the subjectAltName of certificates. +HAS_SNI = SUBJ_ALT_NAME_SUPPORT + +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} +_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER + + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} + + +orig_util_HAS_SNI = util.HAS_SNI +orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket + + +def inject_into_urllib3(): + 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' + + connectionpool.ssl_wrap_socket = ssl_wrap_socket + util.HAS_SNI = HAS_SNI + + +def extract_from_urllib3(): + 'Undo monkey-patching by :func:`inject_into_urllib3`.' + + connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + util.HAS_SNI = orig_util_HAS_SNI + + +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +def get_subj_alt_name(peer_cert): + # Search through extensions + dns_name = [] + if not SUBJ_ALT_NAME_SUPPORT: + return dns_name + + general_names = SubjectAltName() + for i in range(peer_cert.get_extension_count()): + ext = peer_cert.get_extension(i) + ext_name = ext.get_short_name() + if ext_name != 'subjectAltName': + continue + + # PyOpenSSL returns extension data in ASN.1 encoded form + ext_dat = ext.get_data() + decoded_dat = der_decoder.decode(ext_dat, + asn1Spec=general_names) + + for name in decoded_dat: + if not isinstance(name, SubjectAltName): + continue + for entry in range(len(name)): + component = name.getComponentByPosition(entry) + if component.getName() != 'dNSName': + continue + dns_name.append(str(component.getComponent())) + + return dns_name + + +class WrappedSocket(object): + '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + + def __init__(self, connection, socket): + self.connection = connection + self.socket = socket + + def makefile(self, mode, bufsize=-1): + return _fileobject(self.connection, mode, bufsize) + + def settimeout(self, timeout): + return self.socket.settimeout(timeout) + + def sendall(self, data): + return self.connection.sendall(data) + + def getpeercert(self, binary_form=False): + x509 = self.connection.get_peer_certificate() + if not x509: + raise ssl.SSLError('') + + if binary_form: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, + x509) + + return { + 'subject': ( + (('commonName', x509.get_subject().CN),), + ), + 'subjectAltName': [ + ('DNS', value) + for value in get_subj_alt_name(x509) + ] + } + + +def _verify_callback(cnx, x509, err_no, err_depth, return_code): + return err_no == 0 + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) + if certfile: + ctx.use_certificate_file(certfile) + if keyfile: + ctx.use_privatekey_file(keyfile) + if cert_reqs != ssl.CERT_NONE: + ctx.set_verify(_openssl_verify[cert_reqs], _verify_callback) + if ca_certs: + try: + ctx.load_verify_locations(ca_certs, None) + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + + cnx = OpenSSL.SSL.Connection(ctx, sock) + cnx.set_tlsext_host_name(server_hostname) + cnx.set_connect_state() + try: + cnx.do_handshake() + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + + return WrappedSocket(cnx, sock) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 99ebb67..8dd76af 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -1,5 +1,5 @@ # urllib3/exceptions.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -18,6 +18,10 @@ class PoolError(HTTPError): self.pool = pool HTTPError.__init__(self, "%s: %s" % (pool, message)) + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url) + class SSLError(HTTPError): "Raised when SSL certificate fails in an HTTPS connection." @@ -34,10 +38,16 @@ class DecodeError(HTTPError): class MaxRetryError(PoolError): "Raised when the maximum number of retries is exceeded." - def __init__(self, pool, url): + def __init__(self, pool, url, reason=None): + self.reason = reason + message = "Max retries exceeded with url: %s" % url - PoolError.__init__(self, pool, message) + if reason: + message += " (Caused by %s: %s)" % (type(reason), reason) + else: + message += " (Caused by redirect)" + PoolError.__init__(self, pool, message) self.url = url @@ -72,6 +82,6 @@ class LocationParseError(ValueError, HTTPError): def __init__(self, location): message = "Failed to parse: %s" % location - super(LocationParseError, self).__init__(self, message) + HTTPError.__init__(self, message) self.location = location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e679b93..526a740 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -1,5 +1,5 @@ # urllib3/filepost.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -41,13 +41,16 @@ def iter_fields(fields): def encode_multipart_formdata(fields, boundary=None): """ - Encode a dictionary of ``fields`` using the multipart/form-data mime format. + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields or list of (key, value) field tuples. The key is - treated as the field name, and the value as the body of the form-data - bytes. If the value is a tuple of two elements, then the first element - is treated as the filename of the form-data section. + Dictionary of fields or list of (key, value) or (key, value, MIME type) + field tuples. The key is treated as the field name, and the value as + the body of the form-data bytes. If the value is a tuple of two + elements, then the first element is treated as the filename of the + form-data section and a suitable MIME type is guessed based on the + filename. If the value is a tuple of three elements, then the third + element is treated as an explicit MIME type of the form-data section. Field names and filenames must be unicode. @@ -63,16 +66,20 @@ def encode_multipart_formdata(fields, boundary=None): body.write(b('--%s\r\n' % (boundary))) if isinstance(value, tuple): - filename, data = value + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = get_content_type(filename) writer(body).write('Content-Disposition: form-data; name="%s"; ' 'filename="%s"\r\n' % (fieldname, filename)) body.write(b('Content-Type: %s\r\n\r\n' % - (get_content_type(filename)))) + (content_type,))) else: data = value writer(body).write('Content-Disposition: form-data; name="%s"\r\n' % (fieldname)) - body.write(b'Content-Type: text/plain\r\n\r\n') + body.write(b'\r\n') if isinstance(data, int): data = str(data) # Backwards compatibility @@ -86,6 +93,6 @@ def encode_multipart_formdata(fields, boundary=None): body.write(b('--%s--\r\n' % (boundary))) - content_type = b('multipart/form-data; boundary=%s' % boundary) + content_type = str('multipart/form-data; boundary=%s' % boundary) return body.getvalue(), content_type diff --git a/urllib3/packages/six.py b/urllib3/packages/six.py index a64f6fb..27d8011 100644 --- a/urllib3/packages/six.py +++ b/urllib3/packages/six.py @@ -24,7 +24,7 @@ import sys import types __author__ = "Benjamin Peterson " -__version__ = "1.1.0" +__version__ = "1.2.0" # Revision 41c74fef2ded # True if we are running on Python 3. @@ -45,19 +45,23 @@ else: text_type = unicode binary_type = str - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit + if sys.platform.startswith("java"): + # Jython always uses 32 bits. MAXSIZE = int((1 << 31) - 1) else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X def _add_doc(func, doc): @@ -132,6 +136,7 @@ class _MovedItems(types.ModuleType): _moved_attributes = [ MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), MovedAttribute("map", "itertools", "builtins", "imap", "map"), MovedAttribute("reload_module", "__builtin__", "imp", "reload"), MovedAttribute("reduce", "__builtin__", "functools"), @@ -178,7 +183,7 @@ for attr in _moved_attributes: setattr(_MovedItems, attr.name, attr) del attr -moves = sys.modules["six.moves"] = _MovedItems("moves") +moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves") def add_move(move): @@ -219,12 +224,19 @@ else: _iteritems = "iteritems" +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + if PY3: def get_unbound_function(unbound): return unbound - - advance_iterator = next + Iterator = object def callable(obj): return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) @@ -232,9 +244,10 @@ else: def get_unbound_function(unbound): return unbound.im_func + class Iterator(object): - def advance_iterator(it): - return it.next() + def next(self): + return type(self).__next__(self) callable = callable _add_doc(get_unbound_function, @@ -249,15 +262,15 @@ get_function_defaults = operator.attrgetter(_func_defaults) def iterkeys(d): """Return an iterator over the keys of a dictionary.""" - return getattr(d, _iterkeys)() + return iter(getattr(d, _iterkeys)()) def itervalues(d): """Return an iterator over the values of a dictionary.""" - return getattr(d, _itervalues)() + return iter(getattr(d, _itervalues)()) def iteritems(d): """Return an iterator over the (key, value) pairs of a dictionary.""" - return getattr(d, _iteritems)() + return iter(getattr(d, _iteritems)()) if PY3: diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 8f5b54c..ce0c248 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -23,6 +23,9 @@ pool_classes_by_scheme = { log = logging.getLogger(__name__) +SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', + 'ssl_version') + class PoolManager(RequestMethods): """ @@ -30,8 +33,12 @@ class PoolManager(RequestMethods): necessary connection pools for you. :param num_pools: - Number of connection pools to cache before discarding the least recently - used pool. + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. :param \**connection_pool_kw: Additional parameters are used to create fresh @@ -40,19 +47,37 @@ class PoolManager(RequestMethods): Example: :: >>> manager = PoolManager(num_pools=2) - >>> r = manager.urlopen("http://google.com/") - >>> r = manager.urlopen("http://google.com/mail") - >>> r = manager.urlopen("http://yahoo.com/") + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') >>> len(manager.pools) 2 """ - def __init__(self, num_pools=10, **connection_pool_kw): + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): + RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def _new_pool(self, scheme, host, port): + """ + Create a new :class:`ConnectionPool` based on host, port and scheme. + + This method is used to actually create the connection pools handed out + by :meth:`connection_from_url` and companion methods. It is intended + to be overridden for customization. + """ + pool_cls = pool_classes_by_scheme[scheme] + kwargs = self.connection_pool_kw + if scheme == 'http': + kwargs = self.connection_pool_kw.copy() + for kw in SSL_KEYWORDS: + kwargs.pop(kw, None) + + return pool_cls(host, port, **kwargs) + def clear(self): """ Empty our store of pools and direct them all to close. @@ -69,6 +94,7 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) @@ -80,11 +106,8 @@ class PoolManager(RequestMethods): return pool # Make a fresh ConnectionPool of the desired type - pool_cls = pool_classes_by_scheme[scheme] - pool = pool_cls(host, port, **self.connection_pool_kw) - + pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool - return pool def connection_from_url(self, url): @@ -113,6 +136,8 @@ class PoolManager(RequestMethods): kw['assert_same_host'] = False kw['redirect'] = False + if 'headers' not in kw: + kw['headers'] = self.headers response = conn.urlopen(method, u.request_uri, **kw) @@ -124,32 +149,41 @@ class PoolManager(RequestMethods): method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + kw['redirect'] = redirect return self.urlopen(method, redirect_location, **kw) class ProxyManager(RequestMethods): """ Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. + will make requests to any url through the defined proxy. The ProxyManager + class will automatically set the 'Host' header if it is not provided. """ def __init__(self, proxy_pool): self.proxy_pool = proxy_pool - def _set_proxy_headers(self, headers=None): - headers = headers or {} + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + + host = parse_url(url).host + if host: + headers_['Host'] = host - # Same headers are curl passes for --proxy1.0 - headers['Accept'] = '*/*' - headers['Proxy-Connection'] = 'Keep-Alive' + if headers: + headers_.update(headers) - return headers + return headers_ def urlopen(self, method, url, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(kw.get('headers')) + kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) return self.proxy_pool.urlopen(method, url, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index 569ac96..bf0256e 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,5 +1,5 @@ # urllib3/request.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -36,12 +36,20 @@ class RequestMethods(object): :meth:`.request` is for making any kind of request, it will look up the appropriate encoding format and use one of the above two methods to make the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + def __init__(self, headers=None): + self.headers = headers or {} + def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, **kw): # Abstract @@ -97,13 +105,16 @@ class RequestMethods(object): such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND - key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example: :: fields = { 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), 'realfile': ('barfile.txt', open('realfile').read()), - 'nonamefile': ('contents of nonamefile field'), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', } When uploading a file, providing a filename (the first parameter of the @@ -121,8 +132,11 @@ class RequestMethods(object): body, content_type = (urlencode(fields or {}), 'application/x-www-form-urlencoded') - headers = headers or {} - headers.update({'Content-Type': content_type}) + if headers is None: + headers = self.headers + + headers_ = {'Content-Type': content_type} + headers_.update(headers) - return self.urlopen(method, url, body=body, headers=headers, + return self.urlopen(method, url, body=body, headers=headers_, **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py index 28537d3..1685760 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -1,32 +1,51 @@ # urllib3/response.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import gzip + import logging import zlib -from io import BytesIO - from .exceptions import DecodeError -from .packages.six import string_types as basestring +from .packages.six import string_types as basestring, binary_type log = logging.getLogger(__name__) -def decode_gzip(data): - gzipper = gzip.GzipFile(fileobj=BytesIO(data)) - return gzipper.read() +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + def __getattr__(self, name): + return getattr(self._obj, name) -def decode_deflate(data): - try: - return zlib.decompress(data) - except zlib.error: - return zlib.decompress(data, -zlib.MAX_WBITS) + def decompress(self, data): + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + return self._obj.decompress(data) + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None + + +def _get_decoder(mode): + if mode == 'gzip': + return zlib.decompressobj(16 + zlib.MAX_WBITS) + + return DeflateDecoder() class HTTPResponse(object): @@ -52,10 +71,7 @@ class HTTPResponse(object): otherwise unused. """ - CONTENT_DECODERS = { - 'gzip': decode_gzip, - 'deflate': decode_deflate, - } + CONTENT_DECODERS = ['gzip', 'deflate'] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -65,8 +81,9 @@ class HTTPResponse(object): self.version = version self.reason = reason self.strict = strict + self.decode_content = decode_content - self._decode_content = decode_content + self._decoder = None self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response @@ -115,13 +132,13 @@ class HTTPResponse(object): parameters: ``decode_content`` and ``cache_content``. :param amt: - How much of the content to read. If specified, decoding and caching - is skipped because we can't decode partial content nor does it make - sense to cache partial content as the full response. + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. :param decode_content: If True, will attempt to decode the body based on the - 'content-encoding' header. (Overridden if ``amt`` is set.) + 'content-encoding' header. :param cache_content: If True, will save the returned data such that the same result is @@ -130,28 +147,50 @@ class HTTPResponse(object): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - content_encoding = self.headers.get('content-encoding') - decoder = self.CONTENT_DECODERS.get(content_encoding) + # Note: content-encoding value should be case-insensitive, per RFC 2616 + # Section 3.5 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) if decode_content is None: - decode_content = self._decode_content + decode_content = self.decode_content if self._fp is None: return + flush_decoder = False + try: if amt is None: # cStringIO doesn't like amt=None data = self._fp.read() + flush_decoder = True else: - return self._fp.read(amt) + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do not + # properly close the connection in all cases. There is no harm + # in redundantly calling close. + self._fp.close() + flush_decoder = True try: - if decode_content and decoder: - data = decoder(data) + if decode_content and self._decoder: + data = self._decoder.decompress(data) except (IOError, zlib.error): raise DecodeError("Received response with content-encoding: %s, but " "failed to decode it." % content_encoding) + if flush_decoder and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + if cache_content: self._body = data diff --git a/urllib3/util.py b/urllib3/util.py index 8ec990b..544f9ed 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -1,5 +1,5 @@ # urllib3/util.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -8,18 +8,32 @@ from base64 import b64encode from collections import namedtuple from socket import error as SocketError +from hashlib import md5, sha1 +from binascii import hexlify, unhexlify try: from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms +except ImportError: # `poll` doesn't exist on OSX and other platforms poll = False try: from select import select - except ImportError: # `select` doesn't exist on AppEngine. + except ImportError: # `select` doesn't exist on AppEngine. select = False +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + from .packages import six -from .exceptions import LocationParseError +from .exceptions import LocationParseError, SSLError class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): @@ -92,9 +106,9 @@ def parse_url(url): >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> prase_url('google.com:80') + >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> prase_url('/foo?bar') + >>> parse_url('/foo?bar') Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) """ @@ -220,7 +234,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, return headers -def is_connection_dropped(conn): +def is_connection_dropped(conn): # Platform-specific """ Returns True if the connection is dropped and should be closed. @@ -234,7 +248,7 @@ def is_connection_dropped(conn): if not sock: # Platform-specific: AppEngine return False - if not poll: # Platform-specific + if not poll: if not select: # Platform-specific: AppEngine return False @@ -250,3 +264,115 @@ def is_connection_dropped(conn): if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + + digest_length, rest = divmod(len(fingerprint), 2) + + if rest or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) -- cgit v1.2.3 From 52980ebd0a4eb75acf055a2256e095772c1fa7c6 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:35 -0700 Subject: Imported Upstream version 1.7.1 --- CHANGES.rst | 48 ++++ CONTRIBUTORS.txt | 30 +++ PKG-INFO | 70 +++++- README.rst | 18 +- dummyserver/handlers.py | 57 ++++- dummyserver/proxy.py | 137 ++++++++++++ dummyserver/server.py | 50 +++-- dummyserver/testcase.py | 86 +++++-- setup.cfg | 1 + test-requirements.txt | 5 +- test/__init__.py | 0 test/benchmark.py | 77 +++++++ test/test_connectionpool.py | 29 ++- test/test_exceptions.py | 30 ++- test/test_fields.py | 44 ++++ test/test_filepost.py | 14 ++ test/test_proxymanager.py | 18 +- test/test_response.py | 135 ++++++++++- test/test_util.py | 155 +++++++++++-- urllib3.egg-info/PKG-INFO | 70 +++++- urllib3.egg-info/SOURCES.txt | 5 + urllib3/__init__.py | 4 +- urllib3/_collections.py | 16 +- urllib3/connectionpool.py | 286 ++++++++++++++++++------ urllib3/contrib/ntlmpool.py | 2 +- urllib3/contrib/pyopenssl.py | 193 +++++++++++++++- urllib3/exceptions.py | 54 ++++- urllib3/fields.py | 177 +++++++++++++++ urllib3/filepost.py | 57 ++--- urllib3/packages/ssl_match_hostname/__init__.py | 67 ++++-- urllib3/poolmanager.py | 121 +++++++--- urllib3/request.py | 2 +- urllib3/response.py | 72 +++++- urllib3/util.py | 260 ++++++++++++++++++++- 34 files changed, 2112 insertions(+), 278 deletions(-) create mode 100755 dummyserver/proxy.py create mode 100644 test/__init__.py create mode 100644 test/benchmark.py create mode 100644 test/test_fields.py create mode 100644 urllib3/fields.py diff --git a/CHANGES.rst b/CHANGES.rst index a2a0da8..891fd79 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,54 @@ Changes ======= +1.7.1 (2013-09-25) +++++++++++++++++++ + +* Added granular timeout support with new `urllib3.util.Timeout` class. + (Issue #231) + +* Fixed Python 3.4 support. (Issue #238) + + +1.7 (2013-08-14) +++++++++++++++++ + +* More exceptions are now pickle-able, with tests. (Issue #174) + +* Fixed redirecting with relative URLs in Location header. (Issue #178) + +* Support for relative urls in ``Location: ...`` header. (Issue #179) + +* ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus + file-like functionality. (Issue #187) + +* Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will + skip hostname verification for SSL connections. (Issue #194) + +* New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a + generator wrapped around ``.read(...)``. (Issue #198) + +* IPv6 url parsing enforces brackets around the hostname. (Issue #199) + +* Fixed thread race condition in + ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204) + +* ``ProxyManager`` requests now include non-default port in ``Host: ...`` + header. (Issue #217) + +* Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139) + +* New ``RequestField`` object can be passed to the ``fields=...`` param which + can specify headers. (Issue #220) + +* Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails. + (Issue #221) + +* Use international headers when posting file names. (Issue #119) + +* Improved IPv6 support. (Issue #203) + + 1.6 (2013-04-25) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 19f59ce..e1aca42 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -60,5 +60,35 @@ In chronological order: * Marc Schlaich * Various bugfixes and test improvements. +* Bryce Boe + * Correct six.moves conflict + * Fixed pickle support of some exceptions + +* Boris Figovsky + * Allowed to skip SSL hostname verification + +* Cory Benfield + * Stream method for Response objects. + * Return native strings in header values. + * Generate 'Host' header when using proxies. + +* Jason Robinson + * Add missing WrappedSocket.fileno method in PyOpenSSL + +* Audrius Butkevicius + * Fixed a race condition + +* Stanislav Vitkovskiy + * Added HTTPS (CONNECT) proxy support + +* Stephen Holsapple + * Added abstraction for granular control of request fields + +* Martin von Gagern + * Support for non-ASCII header parameters + +* Kevin Burke and Pavel Kirichenko + * Support for separate connect and request timeouts + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/PKG-INFO b/PKG-INFO index 661e33b..a81ab9c 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.1 +Metadata-Version: 1.0 Name: urllib3 -Version: 1.6 +Version: 1.7.1 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -25,7 +25,7 @@ Description: ======= - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -89,14 +89,18 @@ Description: ======= Run the tests ============= - We use some external dependencies to run the urllib3 test suite. Easiest way to - run the tests is thusly from the urllib3 source root: :: + We use some external dependencies, multiple interpreters and code coverage + analysis while running test suite. Easiest way to run the tests is thusly the + ``tox`` utility: :: - $ pip install -r test-requirements.txt - $ nosetests - ..................................................... + $ tox + # [..] + py26: commands succeeded + py27: commands succeeded + py32: commands succeeded + py33: commands succeeded - Success! You could also ``pip install coverage`` to get code coverage reporting. + Note that code coverage less than 100% is regarded as a failing run. Contributing @@ -117,6 +121,54 @@ Description: ======= Changes ======= + 1.7.1 (2013-09-25) + ++++++++++++++++++ + + * Added granular timeout support with new `urllib3.util.Timeout` class. + (Issue #231) + + * Fixed Python 3.4 support. (Issue #238) + + + 1.7 (2013-08-14) + ++++++++++++++++ + + * More exceptions are now pickle-able, with tests. (Issue #174) + + * Fixed redirecting with relative URLs in Location header. (Issue #178) + + * Support for relative urls in ``Location: ...`` header. (Issue #179) + + * ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus + file-like functionality. (Issue #187) + + * Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will + skip hostname verification for SSL connections. (Issue #194) + + * New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a + generator wrapped around ``.read(...)``. (Issue #198) + + * IPv6 url parsing enforces brackets around the hostname. (Issue #199) + + * Fixed thread race condition in + ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204) + + * ``ProxyManager`` requests now include non-default port in ``Host: ...`` + header. (Issue #217) + + * Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139) + + * New ``RequestField`` object can be passed to the ``fields=...`` param which + can specify headers. (Issue #220) + + * Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails. + (Issue #221) + + * Use international headers when posting file names. (Issue #119) + + * Improved IPv6 support. (Issue #203) + + 1.6 (2013-04-25) ++++++++++++++++ diff --git a/README.rst b/README.rst index 75f05d8..b126647 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. +- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -81,14 +81,18 @@ pools, you should look at Run the tests ============= -We use some external dependencies to run the urllib3 test suite. Easiest way to -run the tests is thusly from the urllib3 source root: :: +We use some external dependencies, multiple interpreters and code coverage +analysis while running test suite. Easiest way to run the tests is thusly the +``tox`` utility: :: - $ pip install -r test-requirements.txt - $ nosetests - ..................................................... + $ tox + # [..] + py26: commands succeeded + py27: commands succeeded + py32: commands succeeded + py33: commands succeeded -Success! You could also ``pip install coverage`` to get code coverage reporting. +Note that code coverage less than 100% is regarded as a failing run. Contributing diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index ab48b53..bc51f31 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -87,7 +87,7 @@ class TestingApp(WSGIHandler): if request.method != method: return Response("Wrong method: %s != %s" % - (method, request.method), status='400') + (method, request.method), status='400 Bad Request') return Response() def upload(self, request): @@ -100,17 +100,18 @@ class TestingApp(WSGIHandler): if len(files_) != 1: return Response("Expected 1 file for '%s', not %d" %(param, len(files_)), - status='400') + status='400 Bad Request') file_ = files_[0] data = file_['body'] if int(size) != len(data): return Response("Wrong size: %d != %d" % - (size, len(data)), status='400') + (size, len(data)), status='400 Bad Request') if filename != file_['filename']: return Response("Wrong filename: %s != %s" % - (filename, file_.filename), status='400') + (filename, file_.filename), + status='400 Bad Request') return Response() @@ -118,7 +119,7 @@ class TestingApp(WSGIHandler): "Perform a redirect to ``target``" target = request.params.get('target', '/') headers = [('Location', target)] - return Response(status='303', headers=headers) + return Response(status='303 See Other', headers=headers) def keepalive(self, request): if request.params.get('close', b'0') == b'1': @@ -169,3 +170,49 @@ class TestingApp(WSGIHandler): def shutdown(self, request): sys.exit() + + +# RFC2231-aware replacement of internal tornado function +def _parse_header(line): + r"""Parse a Content-type like header. + + Return the main content-type and a dictionary of options. + + >>> d = _parse_header("CD: fd; foo=\"bar\"; file*=utf-8''T%C3%A4st")[1] + >>> d['file'] == 'T\u00e4st' + True + >>> d['foo'] + 'bar' + """ + import tornado.httputil + import email.utils + from urllib3.packages import six + if not six.PY3: + line = line.encode('utf-8') + parts = tornado.httputil._parseparam(';' + line) + key = next(parts) + # decode_params treats first argument special, but we already stripped key + params = [('Dummy', 'value')] + for p in parts: + i = p.find('=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1:].strip() + params.append((name, value)) + params = email.utils.decode_params(params) + params.pop(0) # get rid of the dummy again + pdict = {} + for name, value in params: + print(repr(value)) + value = email.utils.collapse_rfc2231_value(value) + if len(value) >= 2 and value[0] == '"' and value[-1] == '"': + value = value[1:-1] + pdict[name] = value + return key, pdict + +# TODO: make the following conditional as soon as we know a version +# which does not require this fix. +# See https://github.com/facebook/tornado/issues/868 +if True: + import tornado.httputil + tornado.httputil._parse_header = _parse_header diff --git a/dummyserver/proxy.py b/dummyserver/proxy.py new file mode 100755 index 0000000..aca92a7 --- /dev/null +++ b/dummyserver/proxy.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# Simple asynchronous HTTP proxy with tunnelling (CONNECT). +# +# GET/POST proxying based on +# http://groups.google.com/group/python-tornado/msg/7bea08e7a049cf26 +# +# Copyright (C) 2012 Senko Rasic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +import socket + +import tornado.httpserver +import tornado.ioloop +import tornado.iostream +import tornado.web +import tornado.httpclient + +__all__ = ['ProxyHandler', 'run_proxy'] + + +class ProxyHandler(tornado.web.RequestHandler): + SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT'] + + @tornado.web.asynchronous + def get(self): + + def handle_response(response): + if response.error and not isinstance(response.error, + tornado.httpclient.HTTPError): + self.set_status(500) + self.write('Internal server error:\n' + str(response.error)) + self.finish() + else: + self.set_status(response.code) + for header in ('Date', 'Cache-Control', 'Server', + 'Content-Type', 'Location'): + v = response.headers.get(header) + if v: + self.set_header(header, v) + if response.body: + self.write(response.body) + self.finish() + + req = tornado.httpclient.HTTPRequest(url=self.request.uri, + method=self.request.method, body=self.request.body, + headers=self.request.headers, follow_redirects=False, + allow_nonstandard_methods=True) + + client = tornado.httpclient.AsyncHTTPClient() + try: + client.fetch(req, handle_response) + except tornado.httpclient.HTTPError as e: + if hasattr(e, 'response') and e.response: + self.handle_response(e.response) + else: + self.set_status(500) + self.write('Internal server error:\n' + str(e)) + self.finish() + + @tornado.web.asynchronous + def post(self): + return self.get() + + @tornado.web.asynchronous + def connect(self): + host, port = self.request.uri.split(':') + client = self.request.connection.stream + + def read_from_client(data): + upstream.write(data) + + def read_from_upstream(data): + client.write(data) + + def client_close(data=None): + if upstream.closed(): + return + if data: + upstream.write(data) + upstream.close() + + def upstream_close(data=None): + if client.closed(): + return + if data: + client.write(data) + client.close() + + def start_tunnel(): + client.read_until_close(client_close, read_from_client) + upstream.read_until_close(upstream_close, read_from_upstream) + client.write(b'HTTP/1.0 200 Connection established\r\n\r\n') + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) + upstream = tornado.iostream.IOStream(s) + upstream.connect((host, int(port)), start_tunnel) + + +def run_proxy(port, start_ioloop=True): + """ + Run proxy on the specified port. If start_ioloop is True (default), + the tornado IOLoop will be started immediately. + """ + app = tornado.web.Application([ + (r'.*', ProxyHandler), + ]) + app.listen(port) + ioloop = tornado.ioloop.IOLoop.instance() + if start_ioloop: + ioloop.start() + +if __name__ == '__main__': + port = 8888 + if len(sys.argv) > 1: + port = int(sys.argv[1]) + + print ("Starting HTTP proxy on port %d" % port) + run_proxy(port) diff --git a/dummyserver/server.py b/dummyserver/server.py index 9031664..f4f98a4 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -11,11 +11,14 @@ import sys import threading import socket +from tornado import netutil import tornado.wsgi import tornado.httpserver import tornado.ioloop +import tornado.web from dummyserver.handlers import TestingApp +from dummyserver.proxy import ProxyHandler log = logging.getLogger(__name__) @@ -36,28 +39,29 @@ class SocketServerThread(threading.Thread): """ :param socket_handler: Callable which receives a socket argument for one request. - :param ready_lock: Lock which gets released when the socket handler is + :param ready_event: Event which gets set when the socket handler is ready to receive requests. """ def __init__(self, socket_handler, host='localhost', port=8081, - ready_lock=None): + ready_event=None): threading.Thread.__init__(self) self.socket_handler = socket_handler self.host = host - self.port = port - self.ready_lock = ready_lock + self.ready_event = ready_event def _start_server(self): sock = socket.socket() - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.bind((self.host, self.port)) + if sys.platform != 'win32': + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind((self.host, 0)) + self.port = sock.getsockname()[1] # Once listen() returns, the server socket is ready sock.listen(1) - if self.ready_lock: - self.ready_lock.release() + if self.ready_event: + self.ready_event.set() self.socket_handler(sock) sock.close() @@ -67,34 +71,44 @@ class SocketServerThread(threading.Thread): class TornadoServerThread(threading.Thread): - def __init__(self, host='localhost', port=8081, scheme='http', certs=None): + app = tornado.wsgi.WSGIContainer(TestingApp()) + + def __init__(self, host='localhost', scheme='http', certs=None, + ready_event=None): threading.Thread.__init__(self) self.host = host - self.port = port self.scheme = scheme self.certs = certs + self.ready_event = ready_event def _start_server(self): - container = tornado.wsgi.WSGIContainer(TestingApp()) - if self.scheme == 'https': - http_server = tornado.httpserver.HTTPServer(container, + http_server = tornado.httpserver.HTTPServer(self.app, ssl_options=self.certs) else: - http_server = tornado.httpserver.HTTPServer(container) + http_server = tornado.httpserver.HTTPServer(self.app) - http_server.listen(self.port, address=self.host) + family = socket.AF_INET6 if ':' in self.host else socket.AF_INET + sock, = netutil.bind_sockets(None, address=self.host, family=family) + self.port = sock.getsockname()[1] + http_server.add_sockets([sock]) return http_server def run(self): - self.server = self._start_server() self.ioloop = tornado.ioloop.IOLoop.instance() + self.server = self._start_server() + if self.ready_event: + self.ready_event.set() self.ioloop.start() def stop(self): - self.server.stop() - self.ioloop.stop() + self.ioloop.add_callback(self.server.stop) + self.ioloop.add_callback(self.ioloop.stop) + + +class ProxyServerThread(TornadoServerThread): + app = tornado.web.Application([(r'.*', ProxyHandler)]) if __name__ == '__main__': diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 73b8f2f..a2a1da1 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -1,14 +1,15 @@ import unittest - -from threading import Lock +import socket +import threading +from nose.plugins.skip import SkipTest from dummyserver.server import ( TornadoServerThread, SocketServerThread, DEFAULT_CERTS, + ProxyServerThread, ) - -# TODO: Change ports to auto-allocated? +has_ipv6 = hasattr(socket, 'has_ipv6') class SocketDummyServerTestCase(unittest.TestCase): @@ -18,19 +19,16 @@ class SocketDummyServerTestCase(unittest.TestCase): """ scheme = 'http' host = 'localhost' - port = 18080 @classmethod def _start_server(cls, socket_handler): - ready_lock = Lock() - ready_lock.acquire() + ready_event = threading.Event() cls.server_thread = SocketServerThread(socket_handler=socket_handler, - ready_lock=ready_lock, - host=cls.host, port=cls.port) + ready_event=ready_event, + host=cls.host) cls.server_thread.start() - - # Lock gets released by thread above - ready_lock.acquire() + ready_event.wait() + cls.port = cls.server_thread.port @classmethod def tearDownClass(cls): @@ -41,20 +39,19 @@ class SocketDummyServerTestCase(unittest.TestCase): class HTTPDummyServerTestCase(unittest.TestCase): scheme = 'http' host = 'localhost' - host_alt = '127.0.0.1' # Some tests need two hosts - port = 18081 + host_alt = '127.0.0.1' # Some tests need two hosts certs = DEFAULT_CERTS @classmethod def _start_server(cls): - cls.server_thread = TornadoServerThread(host=cls.host, port=cls.port, + ready_event = threading.Event() + cls.server_thread = TornadoServerThread(host=cls.host, scheme=cls.scheme, - certs=cls.certs) + certs=cls.certs, + ready_event=ready_event) cls.server_thread.start() - - # TODO: Loop-check here instead - import time - time.sleep(0.1) + ready_event.wait() + cls.port = cls.server_thread.port @classmethod def _stop_server(cls): @@ -73,5 +70,52 @@ class HTTPDummyServerTestCase(unittest.TestCase): class HTTPSDummyServerTestCase(HTTPDummyServerTestCase): scheme = 'https' host = 'localhost' - port = 18082 certs = DEFAULT_CERTS + + +class HTTPDummyProxyTestCase(unittest.TestCase): + + http_host = 'localhost' + http_host_alt = '127.0.0.1' + + https_host = 'localhost' + https_host_alt = '127.0.0.1' + https_certs = DEFAULT_CERTS + + proxy_host = 'localhost' + proxy_host_alt = '127.0.0.1' + + @classmethod + def setUpClass(cls): + cls.http_thread = TornadoServerThread(host=cls.http_host, + scheme='http') + cls.http_thread._start_server() + cls.http_port = cls.http_thread.port + + cls.https_thread = TornadoServerThread( + host=cls.https_host, scheme='https', certs=cls.https_certs) + cls.https_thread._start_server() + cls.https_port = cls.https_thread.port + + ready_event = threading.Event() + cls.proxy_thread = ProxyServerThread( + host=cls.proxy_host, ready_event=ready_event) + cls.proxy_thread.start() + ready_event.wait() + cls.proxy_port = cls.proxy_thread.port + + @classmethod + def tearDownClass(cls): + cls.proxy_thread.stop() + cls.proxy_thread.join() + + +class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase): + host = '::1' + + @classmethod + def setUpClass(cls): + if not has_ipv6: + raise SkipTest('IPv6 not available') + else: + super(IPv6HTTPDummyServerTestCase, cls).setUpClass() diff --git a/setup.cfg b/setup.cfg index 8f6983c..8f1fee7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,6 +3,7 @@ logging-clear-handlers = true with-coverage = true cover-package = urllib3 cover-min-percentage = 100 +cover-erase = true [egg_info] tag_build = diff --git a/test-requirements.txt b/test-requirements.txt index 226c13d..f7c3a50 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,4 @@ -nose +nose==1.3 +mock==1.0.1 tornado==2.4.1 -coverage +coverage==3.6 diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/benchmark.py b/test/benchmark.py new file mode 100644 index 0000000..e7049c4 --- /dev/null +++ b/test/benchmark.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +""" +Really simple rudimentary benchmark to compare ConnectionPool versus standard +urllib to demonstrate the usefulness of connection re-using. +""" +from __future__ import print_function + +import sys +import time +import urllib + +sys.path.append('../') +import urllib3 + + +# URLs to download. Doesn't matter as long as they're from the same host, so we +# can take advantage of connection re-using. +TO_DOWNLOAD = [ + 'http://code.google.com/apis/apps/', + 'http://code.google.com/apis/base/', + 'http://code.google.com/apis/blogger/', + 'http://code.google.com/apis/calendar/', + 'http://code.google.com/apis/codesearch/', + 'http://code.google.com/apis/contact/', + 'http://code.google.com/apis/books/', + 'http://code.google.com/apis/documents/', + 'http://code.google.com/apis/finance/', + 'http://code.google.com/apis/health/', + 'http://code.google.com/apis/notebook/', + 'http://code.google.com/apis/picasaweb/', + 'http://code.google.com/apis/spreadsheets/', + 'http://code.google.com/apis/webmastertools/', + 'http://code.google.com/apis/youtube/', +] + + +def urllib_get(url_list): + assert url_list + for url in url_list: + now = time.time() + r = urllib.urlopen(url) + elapsed = time.time() - now + print("Got in %0.3f: %s" % (elapsed, url)) + + +def pool_get(url_list): + assert url_list + pool = urllib3.connection_from_url(url_list[0]) + for url in url_list: + now = time.time() + r = pool.get_url(url) + elapsed = time.time() - now + print("Got in %0.3fs: %s" % (elapsed, url)) + + +if __name__ == '__main__': + print("Running pool_get ...") + now = time.time() + pool_get(TO_DOWNLOAD) + pool_elapsed = time.time() - now + + print("Running urllib_get ...") + now = time.time() + urllib_get(TO_DOWNLOAD) + urllib_elapsed = time.time() - now + + print("Completed pool_get in %0.3fs" % pool_elapsed) + print("Completed urllib_get in %0.3fs" % urllib_elapsed) + + +""" +Example results: + +Completed pool_get in 1.163s +Completed urllib_get in 2.318s +""" diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index a7e104a..ac1768e 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,6 +1,11 @@ import unittest -from urllib3.connectionpool import connection_from_url, HTTPConnectionPool +from urllib3.connectionpool import ( + connection_from_url, + HTTPConnection, + HTTPConnectionPool, +) +from urllib3.util import Timeout from urllib3.packages.ssl_match_hostname import CertificateError from urllib3.exceptions import ( ClosedPoolError, @@ -8,7 +13,7 @@ from urllib3.exceptions import ( HostChangedError, MaxRetryError, SSLError, - TimeoutError, + ReadTimeoutError, ) from socket import error as SocketError, timeout as SocketTimeout @@ -52,6 +57,7 @@ class TestConnectionPool(unittest.TestCase): c = connection_from_url(a) self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) + def test_max_connections(self): pool = HTTPConnectionPool(host='localhost', maxsize=1, block=True) @@ -108,6 +114,7 @@ class TestConnectionPool(unittest.TestCase): "Max retries exceeded with url: Test. " "(Caused by {0}: Test)".format(str(err.__class__))) + def test_pool_size(self): POOL_SIZE = 1 pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True) @@ -122,8 +129,8 @@ class TestConnectionPool(unittest.TestCase): self.assertEqual(pool.pool.qsize(), POOL_SIZE) #make sure that all of the exceptions return the connection to the pool - _test(Empty, TimeoutError) - _test(SocketTimeout, TimeoutError) + _test(Empty, ReadTimeoutError) + _test(SocketTimeout, ReadTimeoutError) _test(BaseSSLError, SSLError) _test(CertificateError, SSLError) @@ -166,6 +173,20 @@ class TestConnectionPool(unittest.TestCase): self.assertRaises(Empty, old_pool_queue.get, block=False) + def test_pool_timeouts(self): + pool = HTTPConnectionPool(host='localhost') + conn = pool._new_conn() + self.assertEqual(conn.__class__, HTTPConnection) + self.assertEqual(pool.timeout.__class__, Timeout) + self.assertEqual(pool.timeout._read, Timeout.DEFAULT_TIMEOUT) + self.assertEqual(pool.timeout._connect, Timeout.DEFAULT_TIMEOUT) + self.assertEqual(pool.timeout.total, None) + + pool = HTTPConnectionPool(host='localhost', timeout=3) + self.assertEqual(pool.timeout._read, 3) + self.assertEqual(pool.timeout._connect, 3) + self.assertEqual(pool.timeout.total, None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_exceptions.py b/test/test_exceptions.py index 3e02ca6..e20649b 100644 --- a/test/test_exceptions.py +++ b/test/test_exceptions.py @@ -1,19 +1,35 @@ import unittest import pickle -from urllib3.exceptions import HTTPError, MaxRetryError, LocationParseError +from urllib3.exceptions import (HTTPError, MaxRetryError, LocationParseError, + ClosedPoolError, EmptyPoolError, + HostChangedError, ReadTimeoutError, + ConnectTimeoutError) from urllib3.connectionpool import HTTPConnectionPool class TestPickle(unittest.TestCase): + def cycle(self, item): + return pickle.loads(pickle.dumps(item)) + def test_exceptions(self): - assert pickle.dumps(HTTPError(None)) - assert pickle.dumps(MaxRetryError(None, None, None)) - assert pickle.dumps(LocationParseError(None)) + assert self.cycle(HTTPError(None)) + assert self.cycle(MaxRetryError(None, None, None)) + assert self.cycle(LocationParseError(None)) + assert self.cycle(ConnectTimeoutError(None)) def test_exceptions_with_objects(self): - assert pickle.dumps(HTTPError('foo')) - assert pickle.dumps(MaxRetryError(HTTPConnectionPool('localhost'), '/', None)) - assert pickle.dumps(LocationParseError('fake location')) + assert self.cycle(HTTPError('foo')) + assert self.cycle(MaxRetryError(HTTPConnectionPool('localhost'), + '/', None)) + assert self.cycle(LocationParseError('fake location')) + assert self.cycle(ClosedPoolError(HTTPConnectionPool('localhost'), + None)) + assert self.cycle(EmptyPoolError(HTTPConnectionPool('localhost'), + None)) + assert self.cycle(HostChangedError(HTTPConnectionPool('localhost'), + '/', None)) + assert self.cycle(ReadTimeoutError(HTTPConnectionPool('localhost'), + '/', None)) diff --git a/test/test_fields.py b/test/test_fields.py new file mode 100644 index 0000000..888c2d5 --- /dev/null +++ b/test/test_fields.py @@ -0,0 +1,44 @@ +import unittest + +from urllib3.fields import guess_content_type, RequestField +from urllib3.packages.six import b, u + + +class TestRequestField(unittest.TestCase): + + def test_guess_content_type(self): + self.assertEqual(guess_content_type('image.jpg'), 'image/jpeg') + self.assertEqual(guess_content_type('notsure'), 'application/octet-stream') + self.assertEqual(guess_content_type(None), 'application/octet-stream') + + def test_create(self): + simple_field = RequestField('somename', 'data') + self.assertEqual(simple_field.render_headers(), '\r\n') + filename_field = RequestField('somename', 'data', filename='somefile.txt') + self.assertEqual(filename_field.render_headers(), '\r\n') + headers_field = RequestField('somename', 'data', headers={'Content-Length': 4}) + self.assertEqual(headers_field.render_headers(), + 'Content-Length: 4\r\n' + '\r\n') + + def test_make_multipart(self): + field = RequestField('somename', 'data') + field.make_multipart(content_type='image/jpg', content_location='/test') + self.assertEqual(field.render_headers(), + 'Content-Disposition: form-data; name="somename"\r\n' + 'Content-Type: image/jpg\r\n' + 'Content-Location: /test\r\n' + '\r\n') + + def test_render_parts(self): + field = RequestField('somename', 'data') + parts = field._render_parts({'name': 'value', 'filename': 'value'}) + self.assertTrue('name="value"' in parts) + self.assertTrue('filename="value"' in parts) + parts = field._render_parts([('name', 'value'), ('filename', 'value')]) + self.assertEqual(parts, 'name="value"; filename="value"') + + def test_render_part(self): + field = RequestField('somename', 'data') + param = field._render_part('filename', u('n\u00e4me')) + self.assertEqual(param, "filename*=utf-8''n%C3%A4me") diff --git a/test/test_filepost.py b/test/test_filepost.py index 70ab100..ca33d61 100644 --- a/test/test_filepost.py +++ b/test/test_filepost.py @@ -1,6 +1,7 @@ import unittest from urllib3.filepost import encode_multipart_formdata, iter_fields +from urllib3.fields import RequestField from urllib3.packages.six import b, u @@ -117,3 +118,16 @@ class TestMultipartEncoding(unittest.TestCase): self.assertEqual(content_type, 'multipart/form-data; boundary=' + str(BOUNDARY)) + + def test_request_fields(self): + fields = [RequestField('k', b'v', filename='somefile.txt', headers={'Content-Type': 'image/jpeg'})] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEquals(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Type: image/jpeg\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py index 64c86e8..e7b5c48 100644 --- a/test/test_proxymanager.py +++ b/test/test_proxymanager.py @@ -5,7 +5,7 @@ from urllib3.poolmanager import ProxyManager class TestProxyManager(unittest.TestCase): def test_proxy_headers(self): - p = ProxyManager(None) + p = ProxyManager('http://something:1234') url = 'http://pypi.python.org/test' # Verify default headers @@ -23,5 +23,21 @@ class TestProxyManager(unittest.TestCase): self.assertEqual(headers, provided_headers) + # Verify proxy with nonstandard port + provided_headers = {'Accept': 'application/json'} + expected_headers = provided_headers.copy() + expected_headers.update({'Host': 'pypi.python.org:8080'}) + url_with_port = 'http://pypi.python.org:8080/test' + headers = p._set_proxy_headers(url_with_port, provided_headers) + + self.assertEqual(headers, expected_headers) + + def test_default_port(self): + p = ProxyManager('http://something') + self.assertEqual(p.proxy.port, 80) + p = ProxyManager('https://something') + self.assertEqual(p.proxy.port, 443) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 199e379..90d34eb 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -1,6 +1,6 @@ import unittest -from io import BytesIO +from io import BytesIO, BufferedReader from urllib3.response import HTTPResponse from urllib3.exceptions import DecodeError @@ -112,5 +112,138 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.read(1), b'f') self.assertEqual(r.read(2), b'oo') + def test_io(self): + import socket + try: + from http.client import HTTPResponse as OldHTTPResponse + except: + from httplib import HTTPResponse as OldHTTPResponse + + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + + self.assertEqual(resp.closed, False) + self.assertEqual(resp.readable(), True) + self.assertEqual(resp.writable(), False) + self.assertRaises(IOError, resp.fileno) + + resp.close() + self.assertEqual(resp.closed, True) + + # Try closing with an `httplib.HTTPResponse`, because it has an + # `isclosed` method. + hlr = OldHTTPResponse(socket.socket()) + resp2 = HTTPResponse(hlr, preload_content=False) + self.assertEqual(resp2.closed, False) + resp2.close() + self.assertEqual(resp2.closed, True) + + #also try when only data is present. + resp3 = HTTPResponse('foodata') + self.assertRaises(IOError, resp3.fileno) + + resp3._fp = 2 + # A corner case where _fp is present but doesn't have `closed`, + # `isclosed`, or `fileno`. Unlikely, but possible. + self.assertEqual(resp3.closed, True) + self.assertRaises(IOError, resp3.fileno) + + def test_io_bufferedreader(self): + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + br = BufferedReader(resp) + + self.assertEqual(br.read(), b'foo') + + br.close() + self.assertEqual(resp.closed, True) + + def test_streaming(self): + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2, decode_content=False) + + self.assertEqual(next(stream), b'fo') + self.assertEqual(next(stream), b'o') + self.assertRaises(StopIteration, next, stream) + + def test_gzipped_streaming(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'gzip'}, + preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'f') + self.assertEqual(next(stream), b'oo') + self.assertRaises(StopIteration, next, stream) + + def test_deflate_streaming(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'f') + self.assertEqual(next(stream), b'oo') + self.assertRaises(StopIteration, next, stream) + + def test_deflate2_streaming(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'f') + self.assertEqual(next(stream), b'oo') + self.assertRaises(StopIteration, next, stream) + + def test_empty_stream(self): + fp = BytesIO(b'') + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2, decode_content=False) + + self.assertRaises(StopIteration, next, stream) + + def test_mock_httpresponse_stream(self): + # Mock out a HTTP Request that does enough to make it through urllib3's + # read() and close() calls, and also exhausts and underlying file + # object. + class MockHTTPRequest(object): + self.fp = None + + def read(self, amt): + data = self.fp.read(amt) + if not data: + self.fp = None + + return data + + def close(self): + self.fp = None + + bio = BytesIO(b'foo') + fp = MockHTTPRequest() + fp.fp = bio + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'fo') + self.assertEqual(next(stream), b'o') + self.assertRaises(StopIteration, next, stream) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_util.py b/test/test_util.py index a989da6..b465fef 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,10 +1,23 @@ -import unittest import logging +import unittest + +from mock import patch from urllib3 import add_stderr_logger -from urllib3.util import get_host, make_headers, split_first, parse_url, Url -from urllib3.exceptions import LocationParseError +from urllib3.util import ( + get_host, + make_headers, + split_first, + parse_url, + Timeout, + Url, +) +from urllib3.exceptions import LocationParseError, TimeoutStateError +# This number represents a time in seconds, it doesn't mean anything in +# isolation. Setting to a high-ish value to avoid conflicts with the smaller +# numbers used for timeouts +TIMEOUT_EPOCH = 1000 class TestUtil(unittest.TestCase): def test_get_host(self): @@ -34,20 +47,20 @@ class TestUtil(unittest.TestCase): 'http://173.194.35.7:80/test': ('http', '173.194.35.7', 80), # IPv6 - '[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), - 'http://[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), - 'http://[2a00:1450:4001:c01::67]/test': ('http', '2a00:1450:4001:c01::67', None), - 'http://[2a00:1450:4001:c01::67]:80': ('http', '2a00:1450:4001:c01::67', 80), - 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '2a00:1450:4001:c01::67', 80), + '[2a00:1450:4001:c01::67]': ('http', '[2a00:1450:4001:c01::67]', None), + 'http://[2a00:1450:4001:c01::67]': ('http', '[2a00:1450:4001:c01::67]', None), + 'http://[2a00:1450:4001:c01::67]/test': ('http', '[2a00:1450:4001:c01::67]', None), + 'http://[2a00:1450:4001:c01::67]:80': ('http', '[2a00:1450:4001:c01::67]', 80), + 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '[2a00:1450:4001:c01::67]', 80), # More IPv6 from http://www.ietf.org/rfc/rfc2732.txt - 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210', 8000), - 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '1080:0:0:0:8:800:200C:417A', None), - 'http://[3ffe:2a00:100:7031::1]': ('http', '3ffe:2a00:100:7031::1', None), - 'http://[1080::8:800:200C:417A]/foo': ('http', '1080::8:800:200C:417A', None), - 'http://[::192.9.5.5]/ipng': ('http', '::192.9.5.5', None), - 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '::FFFF:129.144.52.38', 42), - 'http://[2010:836B:4179::836B:4179]': ('http', '2010:836B:4179::836B:4179', None), + 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', '[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]', 8000), + 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '[1080:0:0:0:8:800:200C:417A]', None), + 'http://[3ffe:2a00:100:7031::1]': ('http', '[3ffe:2a00:100:7031::1]', None), + 'http://[1080::8:800:200C:417A]/foo': ('http', '[1080::8:800:200C:417A]', None), + 'http://[::192.9.5.5]/ipng': ('http', '[::192.9.5.5]', None), + 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '[::FFFF:129.144.52.38]', 42), + 'http://[2010:836B:4179::836B:4179]': ('http', '[2010:836B:4179::836B:4179]', None), } for url, expected_host in url_host_map.items(): returned_host = get_host(url) @@ -57,6 +70,8 @@ class TestUtil(unittest.TestCase): # TODO: Add more tests invalid_host = [ 'http://google.com:foo', + 'http://::1/', + 'http://::1:80/', ] for location in invalid_host: @@ -83,6 +98,9 @@ class TestUtil(unittest.TestCase): returned_url = parse_url(url) self.assertEquals(returned_url, expected_url) + def test_parse_url_invalid_IPv6(self): + self.assertRaises(ValueError, parse_url, '[::1') + def test_request_uri(self): url_host_map = { 'http://google.com/mail': '/mail', @@ -99,6 +117,17 @@ class TestUtil(unittest.TestCase): returned_url = parse_url(url) self.assertEquals(returned_url.request_uri, expected_request_uri) + def test_netloc(self): + url_netloc_map = { + 'http://google.com/mail': 'google.com', + 'http://google.com:80/mail': 'google.com:80', + 'google.com/foobar': 'google.com', + 'google.com:12345': 'google.com:12345', + } + + for url, expected_netloc in url_netloc_map.items(): + self.assertEquals(parse_url(url).netloc, expected_netloc) + def test_make_headers(self): self.assertEqual( make_headers(accept_encoding=True), @@ -148,3 +177,99 @@ class TestUtil(unittest.TestCase): logger.debug('Testing add_stderr_logger') logger.removeHandler(handler) + + def _make_time_pass(self, seconds, timeout, time_mock): + """ Make some time pass for the timeout object """ + time_mock.return_value = TIMEOUT_EPOCH + timeout.start_connect() + time_mock.return_value = TIMEOUT_EPOCH + seconds + return timeout + + def test_invalid_timeouts(self): + try: + Timeout(total=-1) + self.fail("negative value should throw exception") + except ValueError as e: + self.assertTrue('less than' in str(e)) + try: + Timeout(connect=2, total=-1) + self.fail("negative value should throw exception") + except ValueError as e: + self.assertTrue('less than' in str(e)) + + try: + Timeout(read=-1) + self.fail("negative value should throw exception") + except ValueError as e: + self.assertTrue('less than' in str(e)) + + # Booleans are allowed also by socket.settimeout and converted to the + # equivalent float (1.0 for True, 0.0 for False) + Timeout(connect=False, read=True) + + try: + Timeout(read="foo") + self.fail("string value should not be allowed") + except ValueError as e: + self.assertTrue('int or float' in str(e)) + + + @patch('urllib3.util.current_time') + def test_timeout(self, current_time): + timeout = Timeout(total=3) + + # make 'no time' elapse + timeout = self._make_time_pass(seconds=0, timeout=timeout, + time_mock=current_time) + self.assertEqual(timeout.read_timeout, 3) + self.assertEqual(timeout.connect_timeout, 3) + + timeout = Timeout(total=3, connect=2) + self.assertEqual(timeout.connect_timeout, 2) + + timeout = Timeout() + self.assertEqual(timeout.connect_timeout, Timeout.DEFAULT_TIMEOUT) + + # Connect takes 5 seconds, leaving 5 seconds for read + timeout = Timeout(total=10, read=7) + timeout = self._make_time_pass(seconds=5, timeout=timeout, + time_mock=current_time) + self.assertEqual(timeout.read_timeout, 5) + + # Connect takes 2 seconds, read timeout still 7 seconds + timeout = Timeout(total=10, read=7) + timeout = self._make_time_pass(seconds=2, timeout=timeout, + time_mock=current_time) + self.assertEqual(timeout.read_timeout, 7) + + timeout = Timeout(total=10, read=7) + self.assertEqual(timeout.read_timeout, 7) + + timeout = Timeout(total=None, read=None, connect=None) + self.assertEqual(timeout.connect_timeout, None) + self.assertEqual(timeout.read_timeout, None) + self.assertEqual(timeout.total, None) + + + def test_timeout_str(self): + timeout = Timeout(connect=1, read=2, total=3) + self.assertEqual(str(timeout), "Timeout(connect=1, read=2, total=3)") + timeout = Timeout(connect=1, read=None, total=3) + self.assertEqual(str(timeout), "Timeout(connect=1, read=None, total=3)") + + + @patch('urllib3.util.current_time') + def test_timeout_elapsed(self, current_time): + current_time.return_value = TIMEOUT_EPOCH + timeout = Timeout(total=3) + self.assertRaises(TimeoutStateError, timeout.get_connect_duration) + + timeout.start_connect() + self.assertRaises(TimeoutStateError, timeout.start_connect) + + current_time.return_value = TIMEOUT_EPOCH + 2 + self.assertEqual(timeout.get_connect_duration(), 2) + current_time.return_value = TIMEOUT_EPOCH + 37 + self.assertEqual(timeout.get_connect_duration(), 37) + + diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 661e33b..a81ab9c 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.1 +Metadata-Version: 1.0 Name: urllib3 -Version: 1.6 +Version: 1.7.1 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -25,7 +25,7 @@ Description: ======= - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -89,14 +89,18 @@ Description: ======= Run the tests ============= - We use some external dependencies to run the urllib3 test suite. Easiest way to - run the tests is thusly from the urllib3 source root: :: + We use some external dependencies, multiple interpreters and code coverage + analysis while running test suite. Easiest way to run the tests is thusly the + ``tox`` utility: :: - $ pip install -r test-requirements.txt - $ nosetests - ..................................................... + $ tox + # [..] + py26: commands succeeded + py27: commands succeeded + py32: commands succeeded + py33: commands succeeded - Success! You could also ``pip install coverage`` to get code coverage reporting. + Note that code coverage less than 100% is regarded as a failing run. Contributing @@ -117,6 +121,54 @@ Description: ======= Changes ======= + 1.7.1 (2013-09-25) + ++++++++++++++++++ + + * Added granular timeout support with new `urllib3.util.Timeout` class. + (Issue #231) + + * Fixed Python 3.4 support. (Issue #238) + + + 1.7 (2013-08-14) + ++++++++++++++++ + + * More exceptions are now pickle-able, with tests. (Issue #174) + + * Fixed redirecting with relative URLs in Location header. (Issue #178) + + * Support for relative urls in ``Location: ...`` header. (Issue #179) + + * ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus + file-like functionality. (Issue #187) + + * Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will + skip hostname verification for SSL connections. (Issue #194) + + * New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a + generator wrapped around ``.read(...)``. (Issue #198) + + * IPv6 url parsing enforces brackets around the hostname. (Issue #199) + + * Fixed thread race condition in + ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204) + + * ``ProxyManager`` requests now include non-default port in ``Host: ...`` + header. (Issue #217) + + * Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139) + + * New ``RequestField`` object can be passed to the ``fields=...`` param which + can specify headers. (Issue #220) + + * Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails. + (Issue #221) + + * Use international headers when posting file names. (Issue #119) + + * Improved IPv6 support. (Issue #203) + + 1.6 (2013-04-25) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 69ec475..32759d9 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -8,11 +8,15 @@ setup.py test-requirements.txt dummyserver/__init__.py dummyserver/handlers.py +dummyserver/proxy.py dummyserver/server.py dummyserver/testcase.py +test/__init__.py +test/benchmark.py test/test_collections.py test/test_connectionpool.py test/test_exceptions.py +test/test_fields.py test/test_filepost.py test/test_poolmanager.py test/test_proxymanager.py @@ -22,6 +26,7 @@ urllib3/__init__.py urllib3/_collections.py urllib3/connectionpool.py urllib3/exceptions.py +urllib3/fields.py urllib3/filepost.py urllib3/poolmanager.py urllib3/request.py diff --git a/urllib3/__init__.py b/urllib3/__init__.py index ebd43b3..eed7006 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.6' +__version__ = '1.7.1' from .connectionpool import ( @@ -23,7 +23,7 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host +from .util import make_headers, get_host, Timeout # Set default logging handler to avoid "No handler found" warnings. diff --git a/urllib3/_collections.py b/urllib3/_collections.py index b35a736..282b8d5 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -5,7 +5,7 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php from collections import MutableMapping -from threading import Lock +from threading import RLock try: # Python 2.7+ from collections import OrderedDict @@ -40,18 +40,18 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func = dispose_func self._container = self.ContainerCls() - self._lock = Lock() + self.lock = RLock() def __getitem__(self, key): # Re-insert the item, moving it to the end of the eviction line. - with self._lock: + with self.lock: item = self._container.pop(key) self._container[key] = item return item def __setitem__(self, key, value): evicted_value = _Null - with self._lock: + with self.lock: # Possibly evict the existing value of 'key' evicted_value = self._container.get(key, _Null) self._container[key] = value @@ -65,21 +65,21 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func(evicted_value) def __delitem__(self, key): - with self._lock: + with self.lock: value = self._container.pop(key) if self.dispose_func: self.dispose_func(value) def __len__(self): - with self._lock: + with self.lock: return len(self._container) def __iter__(self): raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') def clear(self): - with self._lock: + with self.lock: # Copy pointers to all values, then wipe the mapping # under Python 2, this copies the list of values twice :-| values = list(self._container.values()) @@ -90,5 +90,5 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func(value) def keys(self): - with self._lock: + with self.lock: return self._container.keys() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 73fa9ca..691d4e2 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -4,12 +4,11 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging -import socket import errno +import logging from socket import error as SocketError, timeout as SocketTimeout -from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint +import socket try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -22,11 +21,15 @@ try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full + import Queue as _ # Platform-specific: Windows try: # Compiled with SSL? HTTPSConnection = object - BaseSSLError = None + + class BaseSSLError(BaseException): + pass + ssl = None try: # Python 3 @@ -41,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass -from .request import RequestMethods -from .response import HTTPResponse -from .util import get_host, is_connection_dropped, ssl_wrap_socket from .exceptions import ( ClosedPoolError, + ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, - TimeoutError, + ReadTimeoutError, + ProxyError, ) - -from .packages.ssl_match_hostname import match_hostname, CertificateError +from .packages.ssl_match_hostname import CertificateError, match_hostname from .packages import six - +from .request import RequestMethods +from .response import HTTPResponse +from .util import ( + assert_fingerprint, + get_host, + is_connection_dropped, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, + Timeout, +) xrange = six.moves.xrange @@ -93,11 +104,24 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - sock = socket.create_connection((self.host, self.port), self.timeout) + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) + if self._tunnel_host: + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Wrap socket using verification with the root certs in # trusted_root_certs self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, @@ -110,10 +134,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): if self.assert_fingerprint: assert_fingerprint(self.sock.getpeercert(binary_form=True), self.assert_fingerprint) - else: + elif self.assert_hostname is not False: match_hostname(self.sock.getpeercert(), self.assert_hostname or self.host) + ## Pool objects class ConnectionPool(object): @@ -126,6 +151,9 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): + # httplib doesn't like it when we include brackets in ipv6 addresses + host = host.strip('[]') + self.host = host self.port = port @@ -133,6 +161,8 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) class HTTPConnectionPool(ConnectionPool, RequestMethods): """ @@ -151,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): as a valid HTTP/1.0 or 1.1 status line, passed into :class:`httplib.HTTPConnection`. + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + :param timeout: - Socket timeout for each individual connection, can be a float. None - disables timeout. + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. :param maxsize: Number of connections to save that can be reused. More than 1 is useful @@ -171,20 +207,39 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param headers: Headers to include with all requests, unless other headers are given explicitly. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" """ scheme = 'http' - def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, - block=False, headers=None): + def __init__(self, host, port=None, strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, + headers=None, _proxy=None, _proxy_headers=None): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict + + # This is for backwards compatibility and can be removed once a timeout + # can only be set to a Timeout object + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + self.timeout = timeout + self.pool = self.QueueCls(maxsize) self.block = block + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): self.pool.put(None) @@ -200,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, - port=self.port, - strict=self.strict) + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + + return HTTPConnection(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + **extra_params) + def _get_conn(self, timeout=None): """ @@ -263,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): % self.host) # Connection never got put back into the pool, close it. - conn.close() + if conn: + conn.close() + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ Perform a request on a given httplib connection object taken from our pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. """ self.num_requests += 1 - if timeout is _Default: - timeout = self.timeout - - conn.timeout = timeout # This only does anything in Py26+ - conn.request(method, url, **httplib_request_kw) + timeout_obj = self._get_timeout(timeout) - # Set timeout - sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. - if sock: - sock.settimeout(timeout) + try: + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # request.py. It also calls makefile (recv) on the socket + conn.request(method, url, **httplib_request_kw) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, timeout_obj.connect_timeout)) + + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr + if hasattr(conn, 'sock') and \ + read_timeout is not None and \ + read_timeout is not Timeout.DEFAULT_TIMEOUT: + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + conn.sock.settimeout(read_timeout) + + # Receive the response from the server + try: + try: # Python 2.7+, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older + httplib_response = conn.getresponse() + except SocketTimeout: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) + + except SocketError as e: # Platform-specific: Python 2 + # See the above comment about EAGAIN in Python 3. In Python 2 we + # have to specifically catch it and throw the timeout error + if e.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + raise - try: # Python 2.7+, use buffering of HTTP responses - httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older - httplib_response = conn.getresponse() # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -367,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -375,7 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): use the pool on an HTTP proxy and request foreign hosts. :param timeout: - If specified, overrides the default timeout for this one request. + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. :param pool_timeout: If set and the pool is set to block=True, then this method will @@ -402,18 +522,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if retries < 0: raise MaxRetryError(self, url) - if timeout is _Default: - timeout = self.timeout - if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - host = "%s://%s" % (self.scheme, self.host) - if self.port: - host = "%s:%d" % (host, self.port) - raise HostChangedError(self, url, retries - 1) conn = None @@ -444,18 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.release_conn()`` is called (implicitly by # ``response.read()``) - except Empty as e: + except Empty: # Timed out by queue - raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" % - pool_timeout) + raise ReadTimeoutError( + self, url, "Read timed out, no pool connections are available.") - except SocketTimeout as e: + except SocketTimeout: # Timed out by socket - raise TimeoutError(self, "Request timed out. (timeout=%s)" % - timeout) + raise ReadTimeoutError(self, url, "Read timed out.") except BaseSSLError as e: # SSL certificate error + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") raise SSLError(e) except CertificateError as e: @@ -463,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise SSLError(e) except (HTTPException, SocketError) as e: + if isinstance(e, SocketError) and self.proxy is not None: + raise ProxyError('Cannot connect to proxy. ' + 'Socket error: %s.' % e) + # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below @@ -511,6 +630,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and ``ssl_version`` are only used if :mod:`ssl` is available and are fed into @@ -523,13 +643,13 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, + _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None): - HTTPConnectionPool.__init__(self, host, port, - strict, timeout, maxsize, - block, headers) + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, + block, headers, _proxy, _proxy_headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -538,6 +658,34 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + def _prepare_conn(self, connection): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(connection, VerifiedHTTPSConnection): + connection.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + connection.ssl_version = self.ssl_version + + if self.proxy is not None: + # Python 2.7+ + try: + set_tunnel = connection.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = connection._set_tunnel + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib + # would improperly set Host: header to proxy's IP:port. + connection.connect() + + return connection + def _new_conn(self): """ Return a fresh :class:`httplib.HTTPSConnection`. @@ -546,26 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") - - return HTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - - connection = VerifiedHTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - - connection.ssl_version = self.ssl_version - - return connection + connection_class = HTTPSConnection + else: + connection_class = VerifiedHTTPSConnection + + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + connection = connection_class(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + + return self._prepare_conn(connection) def connection_from_url(url, **kw): diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py index 277ee0b..b8cd933 100644 --- a/urllib3/contrib/ntlmpool.py +++ b/urllib3/contrib/ntlmpool.py @@ -33,7 +33,7 @@ class NTLMConnectionPool(HTTPSConnectionPool): def __init__(self, user, pw, authurl, *args, **kwargs): """ authurl is a random URL on the server that is protected by NTLM. - user is the Windows user, probably in the DOMAIN\username format. + user is the Windows user, probably in the DOMAIN\\username format. pw is the password for the user. """ super(NTLMConnectionPool, self).__init__(*args, **kwargs) diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 5c4c6d8..d43bcd6 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -20,13 +20,13 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. ''' -from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, - SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT from ndg.httpsclient.subj_alt_name import SubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from socket import _fileobject import ssl +from cStringIO import StringIO from .. import connectionpool from .. import util @@ -99,6 +99,172 @@ def get_subj_alt_name(peer_cert): return dns_name +class fileobject(_fileobject): + + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + data = self._sock.recv(left) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self._sock.recv + while True: + try: + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + except OpenSSL.SSL.WantReadError: + continue + break + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + + class WrappedSocket(object): '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' @@ -106,8 +272,11 @@ class WrappedSocket(object): self.connection = connection self.socket = socket + def fileno(self): + return self.socket.fileno() + def makefile(self, mode, bufsize=-1): - return _fileobject(self.connection, mode, bufsize) + return fileobject(self.connection, mode, bufsize) def settimeout(self, timeout): return self.socket.settimeout(timeout) @@ -115,10 +284,14 @@ class WrappedSocket(object): def sendall(self, data): return self.connection.sendall(data) + def close(self): + return self.connection.shutdown() + def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() + if not x509: - raise ssl.SSLError('') + return x509 if binary_form: return OpenSSL.crypto.dump_certificate( @@ -159,9 +332,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) cnx.set_connect_state() - try: - cnx.do_handshake() - except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake', e) + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + break return WrappedSocket(cnx, sock) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 8dd76af..98ef9ab 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -20,7 +20,18 @@ class PoolError(HTTPError): def __reduce__(self): # For pickling purposes. - return self.__class__, (None, self.url) + return self.__class__, (None, None) + + +class RequestError(PoolError): + "Base exception for PoolErrors that have associated URLs." + def __init__(self, pool, url, message): + self.url = url + PoolError.__init__(self, pool, message) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url, None) class SSLError(HTTPError): @@ -28,6 +39,11 @@ class SSLError(HTTPError): pass +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass @@ -35,7 +51,7 @@ class DecodeError(HTTPError): ## Leaf Exceptions -class MaxRetryError(PoolError): +class MaxRetryError(RequestError): "Raised when the maximum number of retries is exceeded." def __init__(self, pool, url, reason=None): @@ -47,23 +63,41 @@ class MaxRetryError(PoolError): else: message += " (Caused by redirect)" - PoolError.__init__(self, pool, message) - self.url = url + RequestError.__init__(self, pool, url, message) -class HostChangedError(PoolError): +class HostChangedError(RequestError): "Raised when an existing pool gets a request for a foreign host." def __init__(self, pool, url, retries=3): message = "Tried to open a foreign host with url: %s" % url - PoolError.__init__(self, pool, message) - - self.url = url + RequestError.__init__(self, pool, url, message) self.retries = retries -class TimeoutError(PoolError): - "Raised when a socket timeout occurs." +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + ` and :exc:`ConnectTimeoutErrors `. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" pass diff --git a/urllib3/fields.py b/urllib3/fields.py new file mode 100644 index 0000000..ed01765 --- /dev/null +++ b/urllib3/fields.py @@ -0,0 +1,177 @@ +# urllib3/fields.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetimes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except UnicodeEncodeError: + pass + else: + return result + if not six.PY3: # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from parameter + of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) + tuple where the MIME type is optional. For example: :: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as + `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value: + parts.append(self._render_part(name, value)) + + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers['Content-Disposition'] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index 526a740..4575582 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -12,6 +12,7 @@ from io import BytesIO from .packages import six from .packages.six import b +from .fields import RequestField writer = codecs.lookup('utf-8')[3] @@ -23,15 +24,38 @@ def choose_boundary(): return uuid4().hex -def get_content_type(filename): - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): """ Iterate over fields. + .. deprecated :: + + The addition of `~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + `~urllib3.fields.RequestField` objects, instead. + Supports list of (k, v) tuples and dicts. + """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) @@ -44,15 +68,7 @@ def encode_multipart_formdata(fields, boundary=None): Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields or list of (key, value) or (key, value, MIME type) - field tuples. The key is treated as the field name, and the value as - the body of the form-data bytes. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section and a suitable MIME type is guessed based on the - filename. If the value is a tuple of three elements, then the third - element is treated as an explicit MIME type of the form-data section. - - Field names and filenames must be unicode. + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). :param boundary: If not specified, then a random boundary will be generated using @@ -62,24 +78,11 @@ def encode_multipart_formdata(fields, boundary=None): if boundary is None: boundary = choose_boundary() - for fieldname, value in iter_fields(fields): + for field in iter_field_objects(fields): body.write(b('--%s\r\n' % (boundary))) - if isinstance(value, tuple): - if len(value) == 3: - filename, data, content_type = value - else: - filename, data = value - content_type = get_content_type(filename) - writer(body).write('Content-Disposition: form-data; name="%s"; ' - 'filename="%s"\r\n' % (fieldname, filename)) - body.write(b('Content-Type: %s\r\n\r\n' % - (content_type,))) - else: - data = value - writer(body).write('Content-Disposition: form-data; name="%s"\r\n' - % (fieldname)) - body.write(b'\r\n') + writer(body).write(field.render_headers()) + data = field.data if isinstance(data, int): data = str(data) # Backwards compatibility diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py index 9560b04..2d61ac2 100644 --- a/urllib3/packages/ssl_match_hostname/__init__.py +++ b/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,23 +7,60 @@ __version__ = '3.2.2' class CertificateError(ValueError): pass -def _dnsname_to_pat(dn): +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ pats = [] - for frag in dn.split(r'.'): - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + if not dn: + return False + + parts = dn.split(r'.') + leftmost = parts[0] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in parts[1:]: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + def match_hostname(cert, hostname): """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. CertificateError is raised on failure. On success, the function returns nothing. @@ -34,7 +71,7 @@ def match_hostname(cert, hostname): san = cert.get('subjectAltName', ()) for key, value in san: if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if not dnsnames: @@ -45,7 +82,7 @@ def match_hostname(cert, hostname): # XXX according to RFC 2818, the most specific Common Name # must be used. if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if len(dnsnames) > 1: diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index ce0c248..e7f8667 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -6,9 +6,14 @@ import logging +try: # Python 3 + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin + from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import connection_from_url, port_by_scheme +from .connectionpool import port_by_scheme from .request import RequestMethods from .util import parse_url @@ -55,6 +60,8 @@ class PoolManager(RequestMethods): """ + proxy = None + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw @@ -94,20 +101,23 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' + port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) - # If the scheme, host, or port doesn't match existing open connections, - # open a new ConnectionPool. - pool = self.pools.get(pool_key) - if pool: - return pool + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool - # Make a fresh ConnectionPool of the desired type - pool = self._new_pool(scheme, host, port) - self.pools[pool_key] = pool + # Make a fresh ConnectionPool of the desired type + pool = self._new_pool(scheme, host, port) + self.pools[pool_key] = pool return pool def connection_from_url(self, url): @@ -139,12 +149,19 @@ class PoolManager(RequestMethods): if 'headers' not in kw: kw['headers'] = self.headers - response = conn.urlopen(method, u.request_uri, **kw) + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) redirect_location = redirect and response.get_redirect_location() if not redirect_location: return response + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + + # RFC 2616, Section 10.3.4 if response.status == 303: method = 'GET' @@ -154,15 +171,59 @@ class PoolManager(RequestMethods): return self.urlopen(method, redirect_location, **kw) -class ProxyManager(RequestMethods): +class ProxyManager(PoolManager): """ - Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. The ProxyManager - class will automatically set the 'Host' header if it is not provided. + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param poxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + """ - def __init__(self, proxy_pool): - self.proxy_pool = proxy_pool + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + assert self.proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http'): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme) def _set_proxy_headers(self, url, headers=None): """ @@ -171,22 +232,28 @@ class ProxyManager(RequestMethods): """ headers_ = {'Accept': '*/*'} - host = parse_url(url).host - if host: - headers_['Host'] = host + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc if headers: headers_.update(headers) - return headers_ - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." - kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) - return self.proxy_pool.urlopen(method, url, **kw) + u = parse_url(url) + + if u.scheme == "http": + # It's too late to set proxy headers on per-request basis for + # tunnelled HTTPS connections, should use + # constructor's proxy_headers instead. + kw['headers'] = self._set_proxy_headers(url, kw.get('headers', + self.headers)) + kw['headers'].update(self.proxy_headers) + + return super(ProxyManager, self).urlopen(method, url, redirect, **kw) -def proxy_from_url(url, **pool_kw): - proxy_pool = connection_from_url(url, **pool_kw) - return ProxyManager(proxy_pool) +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index bf0256e..66a9a0e 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -30,7 +30,7 @@ class RequestMethods(object): in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are - encoded in the *body* of the request using multipart or www-orm-urlencoded + encoded in the *body* of the request using multipart or www-form-urlencoded (such as for POST, PUT, PATCH). :meth:`.request` is for making any kind of request, it will look up the diff --git a/urllib3/response.py b/urllib3/response.py index 1685760..4efff5a 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -7,9 +7,11 @@ import logging import zlib +import io from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type +from .util import is_fp_closed log = logging.getLogger(__name__) @@ -48,7 +50,7 @@ def _get_decoder(mode): return DeflateDecoder() -class HTTPResponse(object): +class HTTPResponse(io.IOBase): """ HTTP Response container. @@ -72,6 +74,7 @@ class HTTPResponse(object): """ CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -105,7 +108,7 @@ class HTTPResponse(object): code and valid location. ``None`` if redirect status and no location. ``False`` if not a redirect status code. """ - if self.status in [301, 302, 303, 307]: + if self.status in self.REDIRECT_STATUSES: return self.headers.get('location') return False @@ -183,11 +186,13 @@ class HTTPResponse(object): try: if decode_content and self._decoder: data = self._decoder.decompress(data) - except (IOError, zlib.error): - raise DecodeError("Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding) + except (IOError, zlib.error) as e: + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e) - if flush_decoder and self._decoder: + if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) data += buf + self._decoder.flush() @@ -200,6 +205,29 @@ class HTTPResponse(object): if self._original_response and self._original_response.isclosed(): self.release_conn() + def stream(self, amt=2**16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -239,3 +267,35 @@ class HTTPResponse(object): def getheader(self, name, default=None): return self.headers.get(name, default) + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + @property + def closed(self): + if self._fp is None: + return True + elif hasattr(self._fp, 'closed'): + return self._fp.closed + elif hasattr(self._fp, 'isclosed'): # Python 2 + return self._fp.isclosed() + else: + return True + + def fileno(self): + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise IOError("The file-like object this HTTPResponse is wrapped " + "around has no file descriptor") + + def flush(self): + if self._fp is not None and hasattr(self._fp, 'flush'): + return self._fp.flush() + + def readable(self): + return True diff --git a/urllib3/util.py b/urllib3/util.py index 544f9ed..266c9ed 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -6,10 +6,11 @@ from base64 import b64encode +from binascii import hexlify, unhexlify from collections import namedtuple -from socket import error as SocketError from hashlib import md5, sha1 -from binascii import hexlify, unhexlify +from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT +import time try: from select import poll, POLLIN @@ -31,9 +32,234 @@ try: # Test for SSL features except ImportError: pass - from .packages import six -from .exceptions import LocationParseError, SSLError +from .exceptions import LocationParseError, SSLError, TimeoutStateError + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + The maximum amount of time to wait for an HTTP request to connect and + return. This combines the connect and read timeouts into one. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, not + the total amount of time for the request to return a complete response. + As an example, you may want a request to return within 7 seconds or + fail, so you set the ``total`` timeout to 7 seconds. If the server + sends one byte to you every 5 seconds, the request will **not** trigger + time out. This case is admittedly rare. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, connect=_Default, read=_Default, total=None): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): @@ -61,6 +287,13 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', return uri + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + def split_first(s, delims): """ @@ -114,7 +347,7 @@ def parse_url(url): # While this code has overlap with stdlib's urlparse, it is much # simplified for our needs and less annoying. - # Additionally, this imeplementations does silly things to be optimal + # Additionally, this implementations does silly things to be optimal # on CPython. scheme = None @@ -143,7 +376,8 @@ def parse_url(url): # IPv6 if url and url[0] == '[': - host, url = url[1:].split(']', 1) + host, url = url.split(']', 1) + host += ']' # Port if ':' in url: @@ -341,6 +575,20 @@ def assert_fingerprint(cert, fingerprint): .format(hexlify(fingerprint_bytes), hexlify(cert_digest))) +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + if hasattr(obj, 'fp'): + # Object is a container for another file-like object that gets released + # on exhaustion (e.g. HTTPResponse) + return obj.fp is None + + return obj.closed + if SSLContext is not None: # Python 3.2+ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, -- cgit v1.2.3 From 5f949ee35667a6065ab02a3e7ab8c98c9fcdcaed Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:36 -0700 Subject: Imported Upstream version 1.8 --- CHANGES.rst | 54 +++- CONTRIBUTORS.txt | 21 ++ PKG-INFO | 58 ++++- dummyserver/server.py | 159 +++++++---- dummyserver/testcase.py | 63 ++--- test-requirements.txt | 2 +- test/__init__.py | 0 test/benchmark.py | 77 ------ test/test_collections.py | 57 +++- test/test_compatibility.py | 23 ++ test/test_connectionpool.py | 24 +- test/test_exceptions.py | 45 ++-- test/test_fields.py | 43 +-- test/test_filepost.py | 2 +- test/test_response.py | 113 ++++++++ test/test_util.py | 28 +- urllib3.egg-info/PKG-INFO | 58 ++++- urllib3.egg-info/SOURCES.txt | 7 +- urllib3/__init__.py | 2 +- urllib3/_collections.py | 117 ++++++++- urllib3/connection.py | 195 ++++++++++++++ urllib3/connectionpool.py | 290 +++++++++------------ urllib3/contrib/pyopenssl.py | 85 +++++- urllib3/exceptions.py | 5 + urllib3/filepost.py | 11 +- urllib3/packages/ssl_match_hostname/__init__.py | 111 +------- .../packages/ssl_match_hostname/_implementation.py | 105 ++++++++ urllib3/poolmanager.py | 11 +- urllib3/request.py | 1 - urllib3/response.py | 29 ++- urllib3/util.py | 54 ++-- 31 files changed, 1312 insertions(+), 538 deletions(-) delete mode 100644 test/__init__.py delete mode 100644 test/benchmark.py create mode 100644 test/test_compatibility.py create mode 100644 urllib3/connection.py create mode 100644 urllib3/packages/ssl_match_hostname/_implementation.py diff --git a/CHANGES.rst b/CHANGES.rst index 891fd79..ae63682 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,62 @@ Changes ======= +1.8 (2014-03-04) +++++++++++++++++ + +* Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in + username, and blank ports like 'hostname:'). + +* New ``urllib3.connection`` module which contains all the HTTPConnection + objects. + +* Several ``urllib3.util.Timeout``-related fixes. Also changed constructor + signature to a more sensible order. [Backwards incompatible] + (Issues #252, #262, #263) + +* Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274) + +* Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which + returns the number of bytes read so far. (Issue #277) + +* Support for platforms without threading. (Issue #289) + +* Expand default-port comparison in ``HTTPConnectionPool.is_same_host`` + to allow a pool with no specified port to be considered equal to to an + HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305) + +* Improved default SSL/TLS settings to avoid vulnerabilities. + (Issue #309) + +* Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors. + (Issue #310) + +* Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests + will send the entire HTTP request ~200 milliseconds faster; however, some of + the resulting TCP packets will be smaller. (Issue #254) + +* Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl`` + from the default 64 to 1024 in a single certificate. (Issue #318) + +* Headers are now passed and stored as a custom + ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``. + (Issue #329, #333) + +* Headers no longer lose their case on Python 3. (Issue #236) + +* ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA + certificates on inject. (Issue #332) + +* Requests with ``retries=False`` will immediately raise any exceptions without + wrapping them in ``MaxRetryError``. (Issue #348) + +* Fixed open socket leak with SSL-related failures. (Issue #344, #348) + + 1.7.1 (2013-09-25) ++++++++++++++++++ -* Added granular timeout support with new `urllib3.util.Timeout` class. +* Added granular timeout support with new ``urllib3.util.Timeout`` class. (Issue #231) * Fixed Python 3.4 support. (Issue #238) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e1aca42..e2dba35 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -90,5 +90,26 @@ In chronological order: * Kevin Burke and Pavel Kirichenko * Support for separate connect and request timeouts +* Peter Waller + * HTTPResponse.tell() for determining amount received over the wire + +* Nipunn Koorapati + * Ignore default ports when comparing hosts for equality + +* Danilo @dbrgn + * Disabled TLS compression by default on Python 3.2+ + * Disabled TLS compression in pyopenssl contrib module + * Configurable cipher suites in pyopenssl contrib module + +* Roman Bogorodskiy + * Account retries on proxy errors + +* Nicolas Delaby + * Use the platform-specific CA certificate locations + +* Josh Schneier + * HTTPHeaderDict and associated tests and docs + * Bugfixes, docs, test coverage + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/PKG-INFO b/PKG-INFO index a81ab9c..6a4f31a 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.7.1 +Version: 1.8 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -121,10 +121,62 @@ Description: ======= Changes ======= + 1.8 (2014-03-04) + ++++++++++++++++ + + * Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in + username, and blank ports like 'hostname:'). + + * New ``urllib3.connection`` module which contains all the HTTPConnection + objects. + + * Several ``urllib3.util.Timeout``-related fixes. Also changed constructor + signature to a more sensible order. [Backwards incompatible] + (Issues #252, #262, #263) + + * Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274) + + * Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which + returns the number of bytes read so far. (Issue #277) + + * Support for platforms without threading. (Issue #289) + + * Expand default-port comparison in ``HTTPConnectionPool.is_same_host`` + to allow a pool with no specified port to be considered equal to to an + HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305) + + * Improved default SSL/TLS settings to avoid vulnerabilities. + (Issue #309) + + * Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors. + (Issue #310) + + * Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests + will send the entire HTTP request ~200 milliseconds faster; however, some of + the resulting TCP packets will be smaller. (Issue #254) + + * Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl`` + from the default 64 to 1024 in a single certificate. (Issue #318) + + * Headers are now passed and stored as a custom + ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``. + (Issue #329, #333) + + * Headers no longer lose their case on Python 3. (Issue #236) + + * ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA + certificates on inject. (Issue #332) + + * Requests with ``retries=False`` will immediately raise any exceptions without + wrapping them in ``MaxRetryError``. (Issue #348) + + * Fixed open socket leak with SSL-related failures. (Issue #344, #348) + + 1.7.1 (2013-09-25) ++++++++++++++++++ - * Added granular timeout support with new `urllib3.util.Timeout` class. + * Added granular timeout support with new ``urllib3.util.Timeout`` class. (Issue #231) * Fixed Python 3.4 support. (Issue #238) diff --git a/dummyserver/server.py b/dummyserver/server.py index f4f98a4..22de456 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -5,21 +5,21 @@ Dummy server used for unit testing. """ from __future__ import print_function +import errno import logging import os +import random +import string import sys import threading import socket -from tornado import netutil +from tornado.platform.auto import set_close_exec import tornado.wsgi import tornado.httpserver import tornado.ioloop import tornado.web -from dummyserver.handlers import TestingApp -from dummyserver.proxy import ProxyHandler - log = logging.getLogger(__name__) @@ -51,7 +51,7 @@ class SocketServerThread(threading.Thread): self.ready_event = ready_event def _start_server(self): - sock = socket.socket() + sock = socket.socket(socket.AF_INET6) if sys.platform != 'win32': sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind((self.host, 0)) @@ -70,59 +70,112 @@ class SocketServerThread(threading.Thread): self.server = self._start_server() -class TornadoServerThread(threading.Thread): - app = tornado.wsgi.WSGIContainer(TestingApp()) +# FIXME: there is a pull request patching bind_sockets in Tornado directly. +# If it gets merged and released we can drop this and use +# `tornado.netutil.bind_sockets` again. +# https://github.com/facebook/tornado/pull/977 - def __init__(self, host='localhost', scheme='http', certs=None, - ready_event=None): - threading.Thread.__init__(self) +def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, + flags=None): + """Creates listening sockets bound to the given port and address. - self.host = host - self.scheme = scheme - self.certs = certs - self.ready_event = ready_event + Returns a list of socket objects (multiple sockets are returned if + the given address maps to multiple IP addresses, which is most common + for mixed IPv4 and IPv6 use). - def _start_server(self): - if self.scheme == 'https': - http_server = tornado.httpserver.HTTPServer(self.app, - ssl_options=self.certs) - else: - http_server = tornado.httpserver.HTTPServer(self.app) + Address may be either an IP address or hostname. If it's a hostname, + the server will listen on all IP addresses associated with the + name. Address may be an empty string or None to listen on all + available interfaces. Family may be set to either `socket.AF_INET` + or `socket.AF_INET6` to restrict to IPv4 or IPv6 addresses, otherwise + both will be used if available. - family = socket.AF_INET6 if ':' in self.host else socket.AF_INET - sock, = netutil.bind_sockets(None, address=self.host, family=family) - self.port = sock.getsockname()[1] - http_server.add_sockets([sock]) - return http_server + The ``backlog`` argument has the same meaning as for + `socket.listen() `. - def run(self): - self.ioloop = tornado.ioloop.IOLoop.instance() - self.server = self._start_server() - if self.ready_event: - self.ready_event.set() - self.ioloop.start() - - def stop(self): - self.ioloop.add_callback(self.server.stop) - self.ioloop.add_callback(self.ioloop.stop) - - -class ProxyServerThread(TornadoServerThread): - app = tornado.web.Application([(r'.*', ProxyHandler)]) - - -if __name__ == '__main__': - log.setLevel(logging.DEBUG) - log.addHandler(logging.StreamHandler(sys.stderr)) - - from urllib3 import get_host + ``flags`` is a bitmask of AI_* flags to `~socket.getaddrinfo`, like + ``socket.AI_PASSIVE | socket.AI_NUMERICHOST``. + """ + sockets = [] + if address == "": + address = None + if not socket.has_ipv6 and family == socket.AF_UNSPEC: + # Python can be compiled with --disable-ipv6, which causes + # operations on AF_INET6 sockets to fail, but does not + # automatically exclude those results from getaddrinfo + # results. + # http://bugs.python.org/issue16208 + family = socket.AF_INET + if flags is None: + flags = socket.AI_PASSIVE + binded_port = None + for res in set(socket.getaddrinfo(address, port, family, + socket.SOCK_STREAM, 0, flags)): + af, socktype, proto, canonname, sockaddr = res + try: + sock = socket.socket(af, socktype, proto) + except socket.error as e: + if e.args[0] == errno.EAFNOSUPPORT: + continue + raise + set_close_exec(sock.fileno()) + if os.name != 'nt': + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if af == socket.AF_INET6: + # On linux, ipv6 sockets accept ipv4 too by default, + # but this makes it impossible to bind to both + # 0.0.0.0 in ipv4 and :: in ipv6. On other systems, + # separate sockets *must* be used to listen for both ipv4 + # and ipv6. For consistency, always disable ipv4 on our + # ipv6 sockets and use a separate ipv4 socket when needed. + # + # Python 2.x on windows doesn't have IPPROTO_IPV6. + if hasattr(socket, "IPPROTO_IPV6"): + sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + + # automatic port allocation with port=None + # should bind on the same port on IPv4 and IPv6 + host, requested_port = sockaddr[:2] + if requested_port == 0 and binded_port is not None: + sockaddr = tuple([host, binded_port] + list(sockaddr[2:])) + + sock.setblocking(0) + sock.bind(sockaddr) + binded_port = sock.getsockname()[1] + sock.listen(backlog) + sockets.append(sock) + return sockets + + +def run_tornado_app(app, io_loop, certs, scheme, host): + if scheme == 'https': + http_server = tornado.httpserver.HTTPServer(app, ssl_options=certs, + io_loop=io_loop) + else: + http_server = tornado.httpserver.HTTPServer(app, io_loop=io_loop) + + sockets = bind_sockets(None, address=host) + port = sockets[0].getsockname()[1] + http_server.add_sockets(sockets) + return http_server, port + + +def run_loop_in_thread(io_loop): + t = threading.Thread(target=io_loop.start) + t.start() + return t - url = "http://localhost:8081" - if len(sys.argv) > 1: - url = sys.argv[1] - print("Starting WSGI server at: %s" % url) +def get_unreachable_address(): + while True: + host = ''.join(random.choice(string.ascii_lowercase) + for _ in range(60)) + sockaddr = (host, 54321) - scheme, host, port = get_host(url) - t = TornadoServerThread(scheme=scheme, host=host, port=port) - t.start() + # check if we are really "lucky" and hit an actual server + try: + s = socket.create_connection(sockaddr) + except socket.error: + return sockaddr + else: + s.close() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index a2a1da1..35769ef 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -2,14 +2,17 @@ import unittest import socket import threading from nose.plugins.skip import SkipTest +from tornado import ioloop, web, wsgi from dummyserver.server import ( - TornadoServerThread, SocketServerThread, + SocketServerThread, + run_tornado_app, + run_loop_in_thread, DEFAULT_CERTS, - ProxyServerThread, ) +from dummyserver.handlers import TestingApp +from dummyserver.proxy import ProxyHandler -has_ipv6 = hasattr(socket, 'has_ipv6') class SocketDummyServerTestCase(unittest.TestCase): @@ -33,7 +36,7 @@ class SocketDummyServerTestCase(unittest.TestCase): @classmethod def tearDownClass(cls): if hasattr(cls, 'server_thread'): - cls.server_thread.join() + cls.server_thread.join(0.1) class HTTPDummyServerTestCase(unittest.TestCase): @@ -44,18 +47,16 @@ class HTTPDummyServerTestCase(unittest.TestCase): @classmethod def _start_server(cls): - ready_event = threading.Event() - cls.server_thread = TornadoServerThread(host=cls.host, - scheme=cls.scheme, - certs=cls.certs, - ready_event=ready_event) - cls.server_thread.start() - ready_event.wait() - cls.port = cls.server_thread.port + cls.io_loop = ioloop.IOLoop() + app = wsgi.WSGIContainer(TestingApp()) + cls.server, cls.port = run_tornado_app(app, cls.io_loop, cls.certs, + cls.scheme, cls.host) + cls.server_thread = run_loop_in_thread(cls.io_loop) @classmethod def _stop_server(cls): - cls.server_thread.stop() + cls.io_loop.add_callback(cls.server.stop) + cls.io_loop.add_callback(cls.io_loop.stop) cls.server_thread.join() @classmethod @@ -87,27 +88,29 @@ class HTTPDummyProxyTestCase(unittest.TestCase): @classmethod def setUpClass(cls): - cls.http_thread = TornadoServerThread(host=cls.http_host, - scheme='http') - cls.http_thread._start_server() - cls.http_port = cls.http_thread.port + cls.io_loop = ioloop.IOLoop() - cls.https_thread = TornadoServerThread( - host=cls.https_host, scheme='https', certs=cls.https_certs) - cls.https_thread._start_server() - cls.https_port = cls.https_thread.port + app = wsgi.WSGIContainer(TestingApp()) + cls.http_server, cls.http_port = run_tornado_app( + app, cls.io_loop, None, 'http', cls.http_host) - ready_event = threading.Event() - cls.proxy_thread = ProxyServerThread( - host=cls.proxy_host, ready_event=ready_event) - cls.proxy_thread.start() - ready_event.wait() - cls.proxy_port = cls.proxy_thread.port + app = wsgi.WSGIContainer(TestingApp()) + cls.https_server, cls.https_port = run_tornado_app( + app, cls.io_loop, cls.https_certs, 'https', cls.http_host) + + app = web.Application([(r'.*', ProxyHandler)]) + cls.proxy_server, cls.proxy_port = run_tornado_app( + app, cls.io_loop, None, 'http', cls.proxy_host) + + cls.server_thread = run_loop_in_thread(cls.io_loop) @classmethod def tearDownClass(cls): - cls.proxy_thread.stop() - cls.proxy_thread.join() + cls.io_loop.add_callback(cls.http_server.stop) + cls.io_loop.add_callback(cls.https_server.stop) + cls.io_loop.add_callback(cls.proxy_server.stop) + cls.io_loop.add_callback(cls.io_loop.stop) + cls.server_thread.join() class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase): @@ -115,7 +118,7 @@ class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase): @classmethod def setUpClass(cls): - if not has_ipv6: + if not socket.has_ipv6: raise SkipTest('IPv6 not available') else: super(IPv6HTTPDummyServerTestCase, cls).setUpClass() diff --git a/test-requirements.txt b/test-requirements.txt index f7c3a50..02d70f4 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,4 @@ nose==1.3 mock==1.0.1 -tornado==2.4.1 +tornado==3.1.1 coverage==3.6 diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/benchmark.py b/test/benchmark.py deleted file mode 100644 index e7049c4..0000000 --- a/test/benchmark.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -""" -Really simple rudimentary benchmark to compare ConnectionPool versus standard -urllib to demonstrate the usefulness of connection re-using. -""" -from __future__ import print_function - -import sys -import time -import urllib - -sys.path.append('../') -import urllib3 - - -# URLs to download. Doesn't matter as long as they're from the same host, so we -# can take advantage of connection re-using. -TO_DOWNLOAD = [ - 'http://code.google.com/apis/apps/', - 'http://code.google.com/apis/base/', - 'http://code.google.com/apis/blogger/', - 'http://code.google.com/apis/calendar/', - 'http://code.google.com/apis/codesearch/', - 'http://code.google.com/apis/contact/', - 'http://code.google.com/apis/books/', - 'http://code.google.com/apis/documents/', - 'http://code.google.com/apis/finance/', - 'http://code.google.com/apis/health/', - 'http://code.google.com/apis/notebook/', - 'http://code.google.com/apis/picasaweb/', - 'http://code.google.com/apis/spreadsheets/', - 'http://code.google.com/apis/webmastertools/', - 'http://code.google.com/apis/youtube/', -] - - -def urllib_get(url_list): - assert url_list - for url in url_list: - now = time.time() - r = urllib.urlopen(url) - elapsed = time.time() - now - print("Got in %0.3f: %s" % (elapsed, url)) - - -def pool_get(url_list): - assert url_list - pool = urllib3.connection_from_url(url_list[0]) - for url in url_list: - now = time.time() - r = pool.get_url(url) - elapsed = time.time() - now - print("Got in %0.3fs: %s" % (elapsed, url)) - - -if __name__ == '__main__': - print("Running pool_get ...") - now = time.time() - pool_get(TO_DOWNLOAD) - pool_elapsed = time.time() - now - - print("Running urllib_get ...") - now = time.time() - urllib_get(TO_DOWNLOAD) - urllib_elapsed = time.time() - now - - print("Completed pool_get in %0.3fs" % pool_elapsed) - print("Completed urllib_get in %0.3fs" % urllib_elapsed) - - -""" -Example results: - -Completed pool_get in 1.163s -Completed urllib_get in 2.318s -""" diff --git a/test/test_collections.py b/test/test_collections.py index b44c58a..4d173ac 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -1,6 +1,9 @@ import unittest -from urllib3._collections import RecentlyUsedContainer as Container +from urllib3._collections import ( + HTTPHeaderDict, + RecentlyUsedContainer as Container +) from urllib3.packages import six xrange = six.moves.xrange @@ -121,5 +124,57 @@ class TestLRUContainer(unittest.TestCase): self.assertRaises(NotImplementedError, d.__iter__) + +class TestHTTPHeaderDict(unittest.TestCase): + def setUp(self): + self.d = HTTPHeaderDict(A='foo') + self.d.add('a', 'bar') + + def test_overwriting_with_setitem_replaces(self): + d = HTTPHeaderDict() + + d['A'] = 'foo' + self.assertEqual(d['a'], 'foo') + + d['a'] = 'bar' + self.assertEqual(d['A'], 'bar') + + def test_copy(self): + h = self.d.copy() + self.assertTrue(self.d is not h) + self.assertEqual(self.d, h) + + def test_add(self): + d = HTTPHeaderDict() + + d['A'] = 'foo' + d.add('a', 'bar') + + self.assertEqual(d['a'], 'foo, bar') + self.assertEqual(d['A'], 'foo, bar') + + def test_getlist(self): + self.assertEqual(self.d.getlist('a'), ['foo', 'bar']) + self.assertEqual(self.d.getlist('A'), ['foo', 'bar']) + self.assertEqual(self.d.getlist('b'), []) + + def test_delitem(self): + del self.d['a'] + self.assertFalse('a' in self.d) + self.assertFalse('A' in self.d) + + def test_equal(self): + b = HTTPHeaderDict({'a': 'foo, bar'}) + self.assertEqual(self.d, b) + c = [('a', 'foo, bar')] + self.assertNotEqual(self.d, c) + + def test_len(self): + self.assertEqual(len(self.d), 1) + + def test_repr(self): + rep = "HTTPHeaderDict({'A': 'foo, bar'})" + self.assertEqual(repr(self.d), rep) + if __name__ == '__main__': unittest.main() diff --git a/test/test_compatibility.py b/test/test_compatibility.py new file mode 100644 index 0000000..05ee4de --- /dev/null +++ b/test/test_compatibility.py @@ -0,0 +1,23 @@ +import unittest +import warnings + +from urllib3.connection import HTTPConnection + + +class TestVersionCompatibility(unittest.TestCase): + def test_connection_strict(self): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + # strict=True is deprecated in Py33+ + conn = HTTPConnection('localhost', 12345, strict=True) + + if w: + self.fail('HTTPConnection raised warning on strict=True: %r' % w[0].message) + + def test_connection_source_address(self): + try: + # source_address does not exist in Py26- + conn = HTTPConnection('localhost', 12345, source_address='127.0.0.1') + except TypeError as e: + self.fail('HTTPConnection raised TypeError on source_adddress: %r' % e) diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index ac1768e..02229cf 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -13,10 +13,9 @@ from urllib3.exceptions import ( HostChangedError, MaxRetryError, SSLError, - ReadTimeoutError, ) -from socket import error as SocketError, timeout as SocketTimeout +from socket import error as SocketError from ssl import SSLError as BaseSSLError try: # Python 3 @@ -39,6 +38,11 @@ class TestConnectionPool(unittest.TestCase): ('http://google.com/', 'http://google.com'), ('http://google.com/', 'http://google.com/abra/cadabra'), ('http://google.com:42/', 'http://google.com:42/abracadabra'), + # Test comparison using default ports + ('http://google.com:80/', 'http://google.com/abracadabra'), + ('http://google.com/', 'http://google.com:80/abracadabra'), + ('https://google.com:443/', 'https://google.com/abracadabra'), + ('https://google.com/', 'https://google.com:443/abracadabra'), ] for a, b in same_host: @@ -51,11 +55,22 @@ class TestConnectionPool(unittest.TestCase): ('http://yahoo.com/', 'http://google.com/'), ('http://google.com:42', 'https://google.com/abracadabra'), ('http://google.com', 'https://google.net/'), + # Test comparison with default ports + ('http://google.com:42', 'http://google.com'), + ('https://google.com:42', 'https://google.com'), + ('http://google.com:443', 'http://google.com'), + ('https://google.com:80', 'https://google.com'), + ('http://google.com:443', 'https://google.com'), + ('https://google.com:80', 'http://google.com'), + ('https://google.com:443', 'http://google.com'), + ('http://google.com:80', 'https://google.com'), ] for a, b in not_same_host: c = connection_from_url(a) self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) + c = connection_from_url(b) + self.assertFalse(c.is_same_host(a), "%s =? %s" % (b, a)) def test_max_connections(self): @@ -128,9 +143,8 @@ class TestConnectionPool(unittest.TestCase): self.assertEqual(pool.pool.qsize(), POOL_SIZE) - #make sure that all of the exceptions return the connection to the pool - _test(Empty, ReadTimeoutError) - _test(SocketTimeout, ReadTimeoutError) + # Make sure that all of the exceptions return the connection to the pool + _test(Empty, EmptyPoolError) _test(BaseSSLError, SSLError) _test(CertificateError, SSLError) diff --git a/test/test_exceptions.py b/test/test_exceptions.py index e20649b..4190a61 100644 --- a/test/test_exceptions.py +++ b/test/test_exceptions.py @@ -11,25 +11,36 @@ from urllib3.connectionpool import HTTPConnectionPool class TestPickle(unittest.TestCase): - def cycle(self, item): + def verify_pickling(self, item): return pickle.loads(pickle.dumps(item)) def test_exceptions(self): - assert self.cycle(HTTPError(None)) - assert self.cycle(MaxRetryError(None, None, None)) - assert self.cycle(LocationParseError(None)) - assert self.cycle(ConnectTimeoutError(None)) + assert self.verify_pickling(HTTPError(None)) + assert self.verify_pickling(MaxRetryError(None, None, None)) + assert self.verify_pickling(LocationParseError(None)) + assert self.verify_pickling(ConnectTimeoutError(None)) def test_exceptions_with_objects(self): - assert self.cycle(HTTPError('foo')) - assert self.cycle(MaxRetryError(HTTPConnectionPool('localhost'), - '/', None)) - assert self.cycle(LocationParseError('fake location')) - assert self.cycle(ClosedPoolError(HTTPConnectionPool('localhost'), - None)) - assert self.cycle(EmptyPoolError(HTTPConnectionPool('localhost'), - None)) - assert self.cycle(HostChangedError(HTTPConnectionPool('localhost'), - '/', None)) - assert self.cycle(ReadTimeoutError(HTTPConnectionPool('localhost'), - '/', None)) + assert self.verify_pickling( + HTTPError('foo')) + + assert self.verify_pickling( + HTTPError('foo', IOError('foo'))) + + assert self.verify_pickling( + MaxRetryError(HTTPConnectionPool('localhost'), '/', None)) + + assert self.verify_pickling( + LocationParseError('fake location')) + + assert self.verify_pickling( + ClosedPoolError(HTTPConnectionPool('localhost'), None)) + + assert self.verify_pickling( + EmptyPoolError(HTTPConnectionPool('localhost'), None)) + + assert self.verify_pickling( + HostChangedError(HTTPConnectionPool('localhost'), '/', None)) + + assert self.verify_pickling( + ReadTimeoutError(HTTPConnectionPool('localhost'), '/', None)) diff --git a/test/test_fields.py b/test/test_fields.py index 888c2d5..cdec68b 100644 --- a/test/test_fields.py +++ b/test/test_fields.py @@ -1,34 +1,39 @@ import unittest from urllib3.fields import guess_content_type, RequestField -from urllib3.packages.six import b, u +from urllib3.packages.six import u class TestRequestField(unittest.TestCase): def test_guess_content_type(self): - self.assertEqual(guess_content_type('image.jpg'), 'image/jpeg') - self.assertEqual(guess_content_type('notsure'), 'application/octet-stream') - self.assertEqual(guess_content_type(None), 'application/octet-stream') + self.assertTrue(guess_content_type('image.jpg') in + ['image/jpeg', 'image/pjpeg']) + self.assertEqual(guess_content_type('notsure'), + 'application/octet-stream') + self.assertEqual(guess_content_type(None), 'application/octet-stream') def test_create(self): - simple_field = RequestField('somename', 'data') - self.assertEqual(simple_field.render_headers(), '\r\n') - filename_field = RequestField('somename', 'data', filename='somefile.txt') - self.assertEqual(filename_field.render_headers(), '\r\n') - headers_field = RequestField('somename', 'data', headers={'Content-Length': 4}) - self.assertEqual(headers_field.render_headers(), - 'Content-Length: 4\r\n' - '\r\n') + simple_field = RequestField('somename', 'data') + self.assertEqual(simple_field.render_headers(), '\r\n') + filename_field = RequestField('somename', 'data', + filename='somefile.txt') + self.assertEqual(filename_field.render_headers(), '\r\n') + headers_field = RequestField('somename', 'data', + headers={'Content-Length': 4}) + self.assertEqual( + headers_field.render_headers(), 'Content-Length: 4\r\n\r\n') def test_make_multipart(self): - field = RequestField('somename', 'data') - field.make_multipart(content_type='image/jpg', content_location='/test') - self.assertEqual(field.render_headers(), - 'Content-Disposition: form-data; name="somename"\r\n' - 'Content-Type: image/jpg\r\n' - 'Content-Location: /test\r\n' - '\r\n') + field = RequestField('somename', 'data') + field.make_multipart(content_type='image/jpg', + content_location='/test') + self.assertEqual( + field.render_headers(), + 'Content-Disposition: form-data; name="somename"\r\n' + 'Content-Type: image/jpg\r\n' + 'Content-Location: /test\r\n' + '\r\n') def test_render_parts(self): field = RequestField('somename', 'data') diff --git a/test/test_filepost.py b/test/test_filepost.py index ca33d61..390dbb3 100644 --- a/test/test_filepost.py +++ b/test/test_filepost.py @@ -124,7 +124,7 @@ class TestMultipartEncoding(unittest.TestCase): encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) - self.assertEquals(encoded, + self.assertEqual(encoded, b'--' + b(BOUNDARY) + b'\r\n' b'Content-Type: image/jpeg\r\n' b'\r\n' diff --git a/test/test_response.py b/test/test_response.py index 90d34eb..ecfcbee 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -5,6 +5,25 @@ from io import BytesIO, BufferedReader from urllib3.response import HTTPResponse from urllib3.exceptions import DecodeError + +from base64 import b64decode + +# A known random (i.e, not-too-compressible) payload generated with: +# "".join(random.choice(string.printable) for i in xrange(512)) +# .encode("zlib").encode("base64") +# Randomness in tests == bad, and fixing a seed may not be sufficient. +ZLIB_PAYLOAD = b64decode(b"""\ +eJwFweuaoQAAANDfineQhiKLUiaiCzvuTEmNNlJGiL5QhnGpZ99z8luQfe1AHoMioB+QSWHQu/L+ +lzd7W5CipqYmeVTBjdgSATdg4l4Z2zhikbuF+EKn69Q0DTpdmNJz8S33odfJoVEexw/l2SS9nFdi +pis7KOwXzfSqarSo9uJYgbDGrs1VNnQpT9f8zAorhYCEZronZQF9DuDFfNK3Hecc+WHLnZLQptwk +nufw8S9I43sEwxsT71BiqedHo0QeIrFE01F/4atVFXuJs2yxIOak3bvtXjUKAA6OKnQJ/nNvDGKZ +Khe5TF36JbnKVjdcL1EUNpwrWVfQpFYJ/WWm2b74qNeSZeQv5/xBhRdOmKTJFYgO96PwrHBlsnLn +a3l0LwJsloWpMbzByU5WLbRE6X5INFqjQOtIwYz5BAlhkn+kVqJvWM5vBlfrwP42ifonM5yF4ciJ +auHVks62997mNGOsM7WXNG3P98dBHPo2NhbTvHleL0BI5dus2JY81MUOnK3SGWLH8HeWPa1t5KcW +S5moAj5HexY/g/F8TctpxwsvyZp38dXeLDjSQvEQIkF7XR3YXbeZgKk3V34KGCPOAeeuQDIgyVhV +nP4HF2uWHA==""") + + class TestLegacyResponse(unittest.TestCase): def test_getheaders(self): headers = {'host': 'example.com'} @@ -167,6 +186,23 @@ class TestResponse(unittest.TestCase): self.assertEqual(next(stream), b'o') self.assertRaises(StopIteration, next, stream) + def test_streaming_tell(self): + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2, decode_content=False) + + position = 0 + + position += len(next(stream)) + self.assertEqual(2, position) + self.assertEqual(position, resp.tell()) + + position += len(next(stream)) + self.assertEqual(3, position) + self.assertEqual(position, resp.tell()) + + self.assertRaises(StopIteration, next, stream) + def test_gzipped_streaming(self): import zlib compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) @@ -182,6 +218,78 @@ class TestResponse(unittest.TestCase): self.assertEqual(next(stream), b'oo') self.assertRaises(StopIteration, next, stream) + def test_gzipped_streaming_tell(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + uncompressed_data = b'foo' + data = compress.compress(uncompressed_data) + data += compress.flush() + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'gzip'}, + preload_content=False) + stream = resp.stream() + + # Read everything + payload = next(stream) + self.assertEqual(payload, uncompressed_data) + + self.assertEqual(len(data), resp.tell()) + + self.assertRaises(StopIteration, next, stream) + + def test_deflate_streaming_tell_intermediate_point(self): + # Ensure that ``tell()`` returns the correct number of bytes when + # part-way through streaming compressed content. + import zlib + + NUMBER_OF_READS = 10 + + class MockCompressedDataReading(BytesIO): + """ + A ByteIO-like reader returning ``payload`` in ``NUMBER_OF_READS`` + calls to ``read``. + """ + + def __init__(self, payload, payload_part_size): + self.payloads = [ + payload[i*payload_part_size:(i+1)*payload_part_size] + for i in range(NUMBER_OF_READS+1)] + + assert b"".join(self.payloads) == payload + + def read(self, _): + # Amount is unused. + if len(self.payloads) > 0: + return self.payloads.pop(0) + return b"" + + uncompressed_data = zlib.decompress(ZLIB_PAYLOAD) + + payload_part_size = len(ZLIB_PAYLOAD) // NUMBER_OF_READS + fp = MockCompressedDataReading(ZLIB_PAYLOAD, payload_part_size) + resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + stream = resp.stream() + + parts_positions = [(part, resp.tell()) for part in stream] + end_of_stream = resp.tell() + + self.assertRaises(StopIteration, next, stream) + + parts, positions = zip(*parts_positions) + + # Check that the payload is equal to the uncompressed data + payload = b"".join(parts) + self.assertEqual(uncompressed_data, payload) + + # Check that the positions in the stream are correct + expected = [(i+1)*payload_part_size for i in range(NUMBER_OF_READS)] + self.assertEqual(expected, list(positions)) + + # Check that the end of the stream is in the correct place + self.assertEqual(len(ZLIB_PAYLOAD), end_of_stream) + def test_deflate_streaming(self): import zlib data = zlib.compress(b'foo') @@ -244,6 +352,11 @@ class TestResponse(unittest.TestCase): self.assertEqual(next(stream), b'o') self.assertRaises(StopIteration, next, stream) + def test_get_case_insensitive_headers(self): + headers = {'host': 'example.com'} + r = HTTPResponse(headers=headers) + self.assertEqual(r.headers.get('host'), 'example.com') + self.assertEqual(r.headers.get('Host'), 'example.com') if __name__ == '__main__': unittest.main() diff --git a/test/test_util.py b/test/test_util.py index b465fef..ebd3b5f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -64,7 +64,7 @@ class TestUtil(unittest.TestCase): } for url, expected_host in url_host_map.items(): returned_host = get_host(url) - self.assertEquals(returned_host, expected_host) + self.assertEqual(returned_host, expected_host) def test_invalid_host(self): # TODO: Add more tests @@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase): 'http://google.com/': Url('http', host='google.com', path='/'), 'http://google.com': Url('http', host='google.com'), 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), + + # Path/query/fragment '': Url(), '/': Url(path='/'), '?': Url(path='', query=''), @@ -93,10 +95,22 @@ class TestUtil(unittest.TestCase): '/foo': Url(path='/foo'), '/foo?bar=baz': Url(path='/foo', query='bar=baz'), '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), + + # Port + 'http://google.com/': Url('http', host='google.com', path='/'), + 'http://google.com:80/': Url('http', host='google.com', port=80, path='/'), + 'http://google.com:/': Url('http', host='google.com', path='/'), + 'http://google.com:80': Url('http', host='google.com', port=80), + 'http://google.com:': Url('http', host='google.com'), + + # Auth + 'http://foo:bar@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'), + 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'), + 'http://foo:bar@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'), } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) - self.assertEquals(returned_url, expected_url) + self.assertEqual(returned_url, expected_url) def test_parse_url_invalid_IPv6(self): self.assertRaises(ValueError, parse_url, '[::1') @@ -115,7 +129,7 @@ class TestUtil(unittest.TestCase): } for url, expected_request_uri in url_host_map.items(): returned_url = parse_url(url) - self.assertEquals(returned_url.request_uri, expected_request_uri) + self.assertEqual(returned_url.request_uri, expected_request_uri) def test_netloc(self): url_netloc_map = { @@ -126,7 +140,7 @@ class TestUtil(unittest.TestCase): } for url, expected_netloc in url_netloc_map.items(): - self.assertEquals(parse_url(url).netloc, expected_netloc) + self.assertEqual(parse_url(url).netloc, expected_netloc) def test_make_headers(self): self.assertEqual( @@ -157,6 +171,9 @@ class TestUtil(unittest.TestCase): make_headers(basic_auth='foo:bar'), {'authorization': 'Basic Zm9vOmJhcg=='}) + self.assertEqual( + make_headers(proxy_basic_auth='foo:bar'), + {'proxy-authorization': 'Basic Zm9vOmJhcg=='}) def test_split_first(self): test_cases = { @@ -250,6 +267,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(timeout.read_timeout, None) self.assertEqual(timeout.total, None) + timeout = Timeout(5) + self.assertEqual(timeout.total, 5) + def test_timeout_str(self): timeout = Timeout(connect=1, read=2, total=3) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index a81ab9c..6a4f31a 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.7.1 +Version: 1.8 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -121,10 +121,62 @@ Description: ======= Changes ======= + 1.8 (2014-03-04) + ++++++++++++++++ + + * Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in + username, and blank ports like 'hostname:'). + + * New ``urllib3.connection`` module which contains all the HTTPConnection + objects. + + * Several ``urllib3.util.Timeout``-related fixes. Also changed constructor + signature to a more sensible order. [Backwards incompatible] + (Issues #252, #262, #263) + + * Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274) + + * Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which + returns the number of bytes read so far. (Issue #277) + + * Support for platforms without threading. (Issue #289) + + * Expand default-port comparison in ``HTTPConnectionPool.is_same_host`` + to allow a pool with no specified port to be considered equal to to an + HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305) + + * Improved default SSL/TLS settings to avoid vulnerabilities. + (Issue #309) + + * Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors. + (Issue #310) + + * Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests + will send the entire HTTP request ~200 milliseconds faster; however, some of + the resulting TCP packets will be smaller. (Issue #254) + + * Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl`` + from the default 64 to 1024 in a single certificate. (Issue #318) + + * Headers are now passed and stored as a custom + ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``. + (Issue #329, #333) + + * Headers no longer lose their case on Python 3. (Issue #236) + + * ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA + certificates on inject. (Issue #332) + + * Requests with ``retries=False`` will immediately raise any exceptions without + wrapping them in ``MaxRetryError``. (Issue #348) + + * Fixed open socket leak with SSL-related failures. (Issue #344, #348) + + 1.7.1 (2013-09-25) ++++++++++++++++++ - * Added granular timeout support with new `urllib3.util.Timeout` class. + * Added granular timeout support with new ``urllib3.util.Timeout`` class. (Issue #231) * Fixed Python 3.4 support. (Issue #238) diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 32759d9..a5170fb 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -11,9 +11,8 @@ dummyserver/handlers.py dummyserver/proxy.py dummyserver/server.py dummyserver/testcase.py -test/__init__.py -test/benchmark.py test/test_collections.py +test/test_compatibility.py test/test_connectionpool.py test/test_exceptions.py test/test_fields.py @@ -24,6 +23,7 @@ test/test_response.py test/test_util.py urllib3/__init__.py urllib3/_collections.py +urllib3/connection.py urllib3/connectionpool.py urllib3/exceptions.py urllib3/fields.py @@ -42,4 +42,5 @@ urllib3/contrib/pyopenssl.py urllib3/packages/__init__.py urllib3/packages/ordered_dict.py urllib3/packages/six.py -urllib3/packages/ssl_match_hostname/__init__.py \ No newline at end of file +urllib3/packages/ssl_match_hostname/__init__.py +urllib3/packages/ssl_match_hostname/_implementation.py \ No newline at end of file diff --git a/urllib3/__init__.py b/urllib3/__init__.py index eed7006..086387f 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.7.1' +__version__ = '1.8' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 282b8d5..9cea3a4 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -4,16 +4,26 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import MutableMapping -from threading import RLock +from collections import Mapping, MutableMapping +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict +from .packages.six import itervalues -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] _Null = object() @@ -92,3 +102,104 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: return self._container.keys() + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 2616. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + + If you want to access the raw headers with their original casing + for debugging purposes you can access the private ``._data`` attribute + which is a normal python ``dict`` that maps the case-insensitive key to a + list of tuples stored as (case-sensitive-original-name, value). Using the + structure from above as our example: + + >>> headers._data + {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], + 'content-length': [('content-length', '7')]} + """ + + def __init__(self, headers=None, **kwargs): + self._data = {} + if headers is None: + headers = {} + self.update(headers, **kwargs) + + def add(self, key, value): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + self._data.setdefault(key.lower(), []).append((key, value)) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + return self[key].split(', ') if key in self else [] + + def copy(self): + h = HTTPHeaderDict() + for key in self._data: + for rawkey, value in self._data[key]: + h.add(rawkey, value) + return h + + def __eq__(self, other): + if not isinstance(other, Mapping): + return False + other = HTTPHeaderDict(other) + return dict((k1, self[k1]) for k1 in self._data) == \ + dict((k2, other[k2]) for k2 in other._data) + + def __getitem__(self, key): + values = self._data[key.lower()] + return ', '.join(value[1] for value in values) + + def __setitem__(self, key, value): + self._data[key.lower()] = [(key, value)] + + def __delitem__(self, key): + del self._data[key.lower()] + + def __len__(self): + return len(self._data) + + def __iter__(self): + for headers in itervalues(self._data): + yield headers[0][0] + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/urllib3/connection.py b/urllib3/connection.py new file mode 100644 index 0000000..662bd2e --- /dev/null +++ b/urllib3/connection.py @@ -0,0 +1,195 @@ +# urllib3/connection.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import sys +import socket +from socket import timeout as SocketTimeout + +try: # Python 3 + from http.client import HTTPConnection as _HTTPConnection, HTTPException +except ImportError: + from httplib import HTTPConnection as _HTTPConnection, HTTPException + +class DummyConnection(object): + "Used to detect a failed ConnectionCls import." + pass + +try: # Compiled with SSL? + ssl = None + HTTPSConnection = DummyConnection + + class BaseSSLError(BaseException): + pass + + try: # Python 3 + from http.client import HTTPSConnection as _HTTPSConnection + except ImportError: + from httplib import HTTPSConnection as _HTTPSConnection + + import ssl + BaseSSLError = ssl.SSLError + +except (ImportError, AttributeError): # Platform-specific: No SSL. + pass + +from .exceptions import ( + ConnectTimeoutError, +) +from .packages.ssl_match_hostname import match_hostname +from .packages import six +from .util import ( + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) + + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + """ + + default_port = port_by_scheme['http'] + + # By default, disable Nagle's Algorithm. + tcp_nodelay = 1 + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + + if sys.version_info < (2, 7): # Python 2.6 and earlier + kw.pop('source_address', None) + self.source_address = None + + _HTTPConnection.__init__(self, *args, **kw) + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it + + :return: a new socket connection + """ + extra_args = [] + if self.source_address: # Python 2.7+ + extra_args.append(self.source_address) + + conn = socket.create_connection( + (self.host, self.port), + self.timeout, + *extra_args + ) + conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + return conn + + def _prepare_conn(self, conn): + self.sock = conn + if self._tunnel_host: + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + + HTTPConnection.__init__(self, host, port, + strict=strict, + timeout=timeout, + source_address=source_address) + + self.key_file = key_file + self.cert_file = cert_file + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) + + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ssl_version = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def connect(self): + # Add certificate verification + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout, + ) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) + + +if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection + HTTPSConnection = VerifiedHTTPSConnection diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 691d4e2..6d0dbb1 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -10,13 +10,6 @@ import logging from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 - from http.client import HTTPConnection, HTTPException - from http.client import HTTP_PORT, HTTPS_PORT -except ImportError: - from httplib import HTTPConnection, HTTPException - from httplib import HTTP_PORT, HTTPS_PORT - try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: @@ -24,121 +17,42 @@ except ImportError: import Queue as _ # Platform-specific: Windows -try: # Compiled with SSL? - HTTPSConnection = object - - class BaseSSLError(BaseException): - pass - - ssl = None - - try: # Python 3 - from http.client import HTTPSConnection - except ImportError: - from httplib import HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass - - from .exceptions import ( ClosedPoolError, + ConnectionError, ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, + TimeoutError, ReadTimeoutError, ProxyError, ) -from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages.ssl_match_hostname import CertificateError from .packages import six +from .connection import ( + port_by_scheme, + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) from .request import RequestMethods from .response import HTTPResponse from .util import ( assert_fingerprint, get_host, is_connection_dropped, - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, Timeout, ) + xrange = six.moves.xrange log = logging.getLogger(__name__) _Default = object() -port_by_scheme = { - 'http': HTTP_PORT, - 'https': HTTPS_PORT, -} - - -## Connection objects (extension of httplib) - -class VerifiedHTTPSConnection(HTTPSConnection): - """ - Based on httplib.HTTPSConnection but wraps the socket with - SSL certification. - """ - cert_reqs = None - ca_certs = None - ssl_version = None - - def set_cert(self, key_file=None, cert_file=None, - cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): - - self.key_file = key_file - self.cert_file = cert_file - self.cert_reqs = cert_reqs - self.ca_certs = ca_certs - self.assert_hostname = assert_hostname - self.assert_fingerprint = assert_fingerprint - - def connect(self): - # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) - resolved_ssl_version = resolve_ssl_version(self.ssl_version) - - if self._tunnel_host: - self.sock = sock - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - - # Wrap socket using verification with the root certs in - # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=resolved_cert_reqs, - ca_certs=self.ca_certs, - server_hostname=self.host, - ssl_version=resolved_ssl_version) - - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) - - ## Pool objects class ConnectionPool(object): @@ -218,6 +132,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = 'http' + ConnectionCls = HTTPConnection def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, @@ -250,19 +165,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPConnection`. + Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - - return HTTPConnection(host=self.host, port=self.port, - timeout=self.timeout.connect_timeout, - **extra_params) + conn = self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + strict=self.strict) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + return conn def _get_conn(self, timeout=None): """ @@ -319,8 +235,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): pass except Full: # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + log.warning( + "Connection pool is full, discarding connection: %s" % + self.host) # Connection never got put back into the pool, close it. if conn: @@ -341,7 +258,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ - Perform a request on a given httplib connection object taken from our + Perform a request on a given urllib connection object taken from our pool. :param conn: @@ -362,7 +279,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # conn.request() calls httplib.*.request, not the method in - # request.py. It also calls makefile (recv) on the socket + # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) except SocketTimeout: raise ConnectTimeoutError( @@ -371,11 +288,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr - if hasattr(conn, 'sock') and \ - read_timeout is not None and \ - read_timeout is not Timeout.DEFAULT_TIMEOUT: + if hasattr(conn, 'sock'): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -385,7 +300,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - conn.sock.settimeout(read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) # Receive the response from the server try: @@ -397,6 +315,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) + except BaseSSLError as e: + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") + + raise + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error @@ -404,8 +332,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - raise + raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -441,9 +369,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) + # Use explicit default port for comparison when none is given if self.port and not port: - # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None return (scheme, host, port) == (self.scheme, self.host, self.port) @@ -482,10 +412,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param retries: Number of retries to allow before raising a MaxRetryError exception. + If `False`, then retries are disabled and any exception is raised + immediately. :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307, 308). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -519,7 +452,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0: + if retries < 0 and retries is not False: raise MaxRetryError(self, url) if release_conn is None: @@ -531,6 +464,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = None + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + try: # Request a connection from the queue conn = self._get_conn(timeout=pool_timeout) @@ -558,38 +502,41 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.read()``) except Empty: - # Timed out by queue - raise ReadTimeoutError( - self, url, "Read timed out, no pool connections are available.") + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") - except SocketTimeout: - # Timed out by socket - raise ReadTimeoutError(self, url, "Read timed out.") - - except BaseSSLError as e: - # SSL certificate error - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + except (BaseSSLError, CertificateError) as e: + # Release connection unconditionally because there is no way to + # close it externally in case of exception. + release_conn = True raise SSLError(e) - except CertificateError as e: - # Name mismatch - raise SSLError(e) + except (TimeoutError, HTTPException, SocketError) as e: + if conn: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn.close() + conn = None - except (HTTPException, SocketError) as e: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) + if not retries: + if isinstance(e, TimeoutError): + # TimeoutError is exempt from MaxRetryError-wrapping. + # FIXME: ... Not sure why. Add a reason here. + raise - # Connection broken, discard. It will be replaced next _get_conn(). - conn = None - # This is necessary so we can access e below - err = e + # Wrap unexpected exceptions with the most appropriate + # module-level exception and re-raise. + if isinstance(e, SocketError) and self.proxy: + raise ProxyError('Cannot connect to proxy.', e) + + if retries is False: + raise ConnectionError('Connection failed.', e) - if retries == 0: raise MaxRetryError(self, url, e) + # Keep track of the error for the retry warning. + err = e + finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -599,8 +546,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, err, url)) + log.warning("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, @@ -608,7 +555,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location: + if redirect_location and retries is not False: if response.status == 303: method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) @@ -626,7 +573,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`httplib.HTTPSConnection`. + instead of :class:`.HTTPSConnection`. :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. @@ -639,6 +586,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = 'https' + ConnectionCls = HTTPSConnection def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, @@ -658,33 +606,33 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, connection): + def _prepare_conn(self, conn): """ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` and establish the tunnel if proxy is used. """ - if isinstance(connection, VerifiedHTTPSConnection): - connection.set_cert(key_file=self.key_file, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - connection.ssl_version = self.ssl_version + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version if self.proxy is not None: # Python 2.7+ try: - set_tunnel = connection.set_tunnel + set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = connection._set_tunnel + set_tunnel = conn._set_tunnel set_tunnel(self.host, self.port, self.proxy_headers) # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. - connection.connect() + conn.connect() - return connection + return conn def _new_conn(self): """ @@ -694,28 +642,30 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + # Platform-specific: Python without ssl + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + actual_host = self.host actual_port = self.port if self.proxy is not None: actual_host = self.proxy.host actual_port = self.proxy.port - if not ssl: # Platform-specific: Python compiled without +ssl - if not HTTPSConnection or HTTPSConnection is object: - raise SSLError("Can't connect to HTTPS URL because the SSL " - "module is not available.") - connection_class = HTTPSConnection - else: - connection_class = VerifiedHTTPSConnection - extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - connection = connection_class(host=actual_host, port=actual_port, - timeout=self.timeout.connect_timeout, - **extra_params) - return self._prepare_conn(connection) + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + + return self._prepare_conn(conn) def connection_from_url(url, **kw): diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index d43bcd6..7c513f3 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -1,4 +1,4 @@ -'''SSL with SNI-support for Python 2. +'''SSL with SNI_-support for Python 2. This needs the following packages installed: @@ -18,17 +18,36 @@ your application begins using ``urllib3``, like this:: Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +encryption in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +Module Variables +---------------- + +:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. + Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: + ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) + ''' from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName +from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder -from socket import _fileobject +from pyasn1.type import univ, constraint +from socket import _fileobject, timeout import ssl +import select from cStringIO import StringIO -from .. import connectionpool +from .. import connection from .. import util __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] @@ -49,25 +68,53 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ + "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ + "!aNULL:!MD5:!DSS" + orig_util_HAS_SNI = util.HAS_SNI -orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket +orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket def inject_into_urllib3(): 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' - connectionpool.ssl_wrap_socket = ssl_wrap_socket + connection.ssl_wrap_socket = ssl_wrap_socket util.HAS_SNI = HAS_SNI def extract_from_urllib3(): 'Undo monkey-patching by :func:`inject_into_urllib3`.' - connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket util.HAS_SNI = orig_util_HAS_SNI +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +class SubjectAltName(BaseSubjectAltName): + '''ASN.1 implementation for subjectAltNames support''' + + # There is no limit to how many SAN certificates a certificate may have, + # however this needs to have some limit so we'll set an arbitrarily high + # limit. + sizeSpec = univ.SequenceOf.sizeSpec + \ + constraint.ValueSizeConstraint(1, 1024) + + ### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. def get_subj_alt_name(peer_cert): # Search through extensions @@ -101,6 +148,13 @@ def get_subj_alt_name(peer_cert): class fileobject(_fileobject): + def _wait_for_sock(self): + rd, wd, ed = select.select([self._sock], [], [], + self._sock.gettimeout()) + if not rd: + raise timeout() + + def read(self, size=-1): # Use max, disallow tiny reads in a loop as they are very inefficient. # We never leave read() with any leftover data from a new recv() call @@ -118,6 +172,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -145,6 +200,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(left) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -196,6 +252,7 @@ class fileobject(_fileobject): break buffers.append(data) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue break return "".join(buffers) @@ -206,6 +263,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -233,7 +291,8 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: - continue + self._wait_for_sock() + continue if not data: break left = size - buf_len @@ -328,6 +387,15 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ctx.load_verify_locations(ca_certs, None) except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + else: + ctx.set_default_verify_paths() + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + ctx.set_options(OP_NO_COMPRESSION) + + # Set list of supported ciphersuites. + ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) @@ -336,6 +404,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: + select.select([sock], [], []) continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 98ef9ab..b4df831 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -44,6 +44,11 @@ class ProxyError(HTTPError): pass +class ConnectionError(HTTPError): + "Raised when a normal connection fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass diff --git a/urllib3/filepost.py b/urllib3/filepost.py index 4575582..e8b30bd 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -46,16 +46,15 @@ def iter_field_objects(fields): def iter_fields(fields): """ - Iterate over fields. + .. deprecated:: 1.6 - .. deprecated :: + Iterate over fields. - The addition of `~urllib3.fields.RequestField` makes this function - obsolete. Instead, use :func:`iter_field_objects`, which returns - `~urllib3.fields.RequestField` objects, instead. + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. Supports list of (k, v) tuples and dicts. - """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py index 2d61ac2..dd59a75 100644 --- a/urllib3/packages/ssl_match_hostname/__init__.py +++ b/urllib3/packages/ssl_match_hostname/__init__.py @@ -1,98 +1,13 @@ -"""The match_hostname() function from Python 3.2, essential when using SSL.""" - -import re - -__version__ = '3.2.2' - -class CertificateError(ValueError): - pass - -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 - - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False - - parts = dn.split(r'.') - leftmost = parts[0] - - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() - - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - - # add the remaining fragments, ignore any wildcards - for frag in parts[1:]: - pats.append(re.escape(frag)) - - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") +try: + # Python 3.2+ + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from ._implementation import CertificateError, match_hostname + +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/urllib3/packages/ssl_match_hostname/_implementation.py b/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 0000000..52f4287 --- /dev/null +++ b/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,105 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html + +import re + +__version__ = '3.4.0.2' + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index e7f8667..f18ff2b 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -176,7 +176,7 @@ class ProxyManager(PoolManager): Behaves just like :class:`PoolManager`, but sends all requests through the defined proxy, using the CONNECT method for HTTPS URLs. - :param poxy_url: + :param proxy_url: The URL of the proxy to be used. :param proxy_headers: @@ -245,12 +245,11 @@ class ProxyManager(PoolManager): u = parse_url(url) if u.scheme == "http": - # It's too late to set proxy headers on per-request basis for - # tunnelled HTTPS connections, should use - # constructor's proxy_headers instead. + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. kw['headers'] = self._set_proxy_headers(url, kw.get('headers', self.headers)) - kw['headers'].update(self.proxy_headers) return super(ProxyManager, self).urlopen(method, url, redirect, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index 66a9a0e..2a92cc2 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -45,7 +45,6 @@ class RequestMethods(object): """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) def __init__(self, headers=None): self.headers = headers or {} diff --git a/urllib3/response.py b/urllib3/response.py index 4efff5a..db44182 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -9,6 +9,7 @@ import logging import zlib import io +from ._collections import HTTPHeaderDict from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type from .util import is_fp_closed @@ -79,7 +80,10 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = headers or {} + + self.headers = HTTPHeaderDict() + if headers: + self.headers.update(headers) self.status = status self.version = version self.reason = reason @@ -90,6 +94,7 @@ class HTTPResponse(io.IOBase): self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response + self._fp_bytes_read = 0 self._pool = pool self._connection = connection @@ -129,6 +134,14 @@ class HTTPResponse(io.IOBase): if self._fp: return self.read(cache_content=True) + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -183,6 +196,8 @@ class HTTPResponse(io.IOBase): self._fp.close() flush_decoder = True + self._fp_bytes_read += len(data) + try: if decode_content and self._decoder: data = self._decoder.decompress(data) @@ -238,17 +253,9 @@ class HTTPResponse(io.IOBase): with ``original_response=r``. """ - # Normalize headers between different versions of Python - headers = {} + headers = HTTPHeaderDict() for k, v in r.getheaders(): - # Python 3: Header keys are returned capitalised - k = k.lower() - - has_value = headers.get(k) - if has_value: # Python 3: Repeating header keys are unmerged. - v = ', '.join([has_value, v]) - - headers[k] = v + headers.add(k, v) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) diff --git a/urllib3/util.py b/urllib3/util.py index 266c9ed..bd26631 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -80,14 +80,13 @@ class Timeout(object): :type read: integer, float, or None :param total: - The maximum amount of time to wait for an HTTP request to connect and - return. This combines the connect and read timeouts into one. In the + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the event that both a connect timeout and a total are specified, or a read timeout and a total are specified, the shorter timeout will be applied. Defaults to None. - :type total: integer, float, or None .. note:: @@ -101,18 +100,23 @@ class Timeout(object): `total`. In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, not - the total amount of time for the request to return a complete response. - As an example, you may want a request to return within 7 seconds or - fail, so you set the ``total`` timeout to 7 seconds. If the server - sends one byte to you every 5 seconds, the request will **not** trigger - time out. This case is admittedly rare. + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. """ #: A sentinel object representing the default timeout value DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - def __init__(self, connect=_Default, read=_Default, total=None): + def __init__(self, total=None, connect=_Default, read=_Default): self._connect = self._validate_timeout(connect, 'connect') self._read = self._validate_timeout(read, 'read') self.total = self._validate_timeout(total, 'total') @@ -372,7 +376,8 @@ def parse_url(url): # Auth if '@' in url: - auth, url = url.split('@', 1) + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) # IPv6 if url and url[0] == '[': @@ -386,10 +391,14 @@ def parse_url(url): if not host: host = _host - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - - port = int(port) + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None elif not host and url: host = url @@ -417,7 +426,7 @@ def get_host(url): def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): + basic_auth=None, proxy_basic_auth=None): """ Shortcuts for generating request headers. @@ -438,6 +447,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, Colon-separated username:password string for 'authorization: basic ...' auth header. + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + Example: :: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") @@ -465,6 +478,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, headers['authorization'] = 'Basic ' + \ b64encode(six.b(basic_auth)).decode('utf-8') + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(six.b(proxy_basic_auth)).decode('utf-8') + return headers @@ -603,6 +620,11 @@ if SSLContext is not None: # Python 3.2+ """ context = SSLContext(ssl_version) context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + if ca_certs: try: context.load_verify_locations(ca_certs) -- cgit v1.2.3 From 35fb123b995cbbe27d3edd5ed14abc6e56b7ad13 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:37 -0700 Subject: Imported Upstream version 1.8.2 --- CHANGES.rst | 17 + CONTRIBUTORS.txt | 6 + MANIFEST.in | 1 + PKG-INFO | 19 +- dummyserver/__init__.pyc | Bin 0 -> 141 bytes dummyserver/__pycache__/__init__.cpython-33.pyc | Bin 0 -> 149 bytes dummyserver/__pycache__/handlers.cpython-33.pyc | Bin 0 -> 10341 bytes dummyserver/__pycache__/proxy.cpython-33.pyc | Bin 0 -> 5809 bytes dummyserver/__pycache__/server.cpython-33.pyc | Bin 0 -> 6928 bytes dummyserver/__pycache__/testcase.cpython-33.pyc | Bin 0 -> 7203 bytes dummyserver/certs/cacert.key | 15 + dummyserver/certs/cacert.pem | 23 + dummyserver/certs/client.csr | 23 + dummyserver/certs/client.key | 15 + dummyserver/certs/client.pem | 22 + dummyserver/certs/client_bad.pem | 17 + dummyserver/certs/server.crt | 22 + dummyserver/certs/server.csr | 22 + dummyserver/certs/server.key | 15 + dummyserver/certs/server.key.org | 12 + dummyserver/handlers.py | 4 + dummyserver/handlers.pyc | Bin 0 -> 8967 bytes dummyserver/proxy.pyc | Bin 0 -> 4740 bytes dummyserver/server.pyc | Bin 0 -> 5992 bytes dummyserver/testcase.pyc | Bin 0 -> 5090 bytes setup.py | 4 +- test/test_poolmanager.py | 8 +- test/test_util.py | 6 +- urllib3.egg-info/PKG-INFO | 19 +- urllib3.egg-info/SOURCES.txt | 30 +- urllib3.egg-info/top_level.txt | 1 - urllib3/__init__.py | 2 +- urllib3/connection.py | 42 +- urllib3/connectionpool.py | 25 +- urllib3/contrib/pyopenssl.py | 17 +- urllib3/util.py | 648 ------------------------ urllib3/util/__init__.py | 27 + urllib3/util/connection.py | 45 ++ urllib3/util/request.py | 68 +++ urllib3/util/response.py | 13 + urllib3/util/ssl_.py | 133 +++++ urllib3/util/timeout.py | 234 +++++++++ urllib3/util/url.py | 162 ++++++ 43 files changed, 1029 insertions(+), 688 deletions(-) create mode 100644 dummyserver/__init__.pyc create mode 100644 dummyserver/__pycache__/__init__.cpython-33.pyc create mode 100644 dummyserver/__pycache__/handlers.cpython-33.pyc create mode 100644 dummyserver/__pycache__/proxy.cpython-33.pyc create mode 100644 dummyserver/__pycache__/server.cpython-33.pyc create mode 100644 dummyserver/__pycache__/testcase.cpython-33.pyc create mode 100644 dummyserver/certs/cacert.key create mode 100644 dummyserver/certs/cacert.pem create mode 100644 dummyserver/certs/client.csr create mode 100644 dummyserver/certs/client.key create mode 100644 dummyserver/certs/client.pem create mode 100644 dummyserver/certs/client_bad.pem create mode 100644 dummyserver/certs/server.crt create mode 100644 dummyserver/certs/server.csr create mode 100644 dummyserver/certs/server.key create mode 100644 dummyserver/certs/server.key.org create mode 100644 dummyserver/handlers.pyc create mode 100644 dummyserver/proxy.pyc create mode 100644 dummyserver/server.pyc create mode 100644 dummyserver/testcase.pyc delete mode 100644 urllib3/util.py create mode 100644 urllib3/util/__init__.py create mode 100644 urllib3/util/connection.py create mode 100644 urllib3/util/request.py create mode 100644 urllib3/util/response.py create mode 100644 urllib3/util/ssl_.py create mode 100644 urllib3/util/timeout.py create mode 100644 urllib3/util/url.py diff --git a/CHANGES.rst b/CHANGES.rst index ae63682..3f836e9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,23 @@ Changes ======= +1.8.2 (2014-04-17) +++++++++++++++++++ + +* Fix ``urllib3.util`` not being included in the package. + + +1.8.1 (2014-04-17) +++++++++++++++++++ + +* Fix AppEngine bug of HTTPS requests going out as HTTP. (Issue #356) + +* Don't install ``dummyserver`` into ``site-packages`` as it's only needed + for the test suite. (Issue #362) + +* Added support for specifying ``source_address``. (Issue #352) + + 1.8 (2014-03-04) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e2dba35..e6178f1 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -111,5 +111,11 @@ In chronological order: * HTTPHeaderDict and associated tests and docs * Bugfixes, docs, test coverage +* Tahia Khan + * Added Timeout examples in docs + +* Arthur Grunseid + * source_address support and tests (with https://github.com/bui) + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/MANIFEST.in b/MANIFEST.in index d1abae2..3f344d1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt test-requirements.txt +recursive-include dummyserver *.* diff --git a/PKG-INFO b/PKG-INFO index 6a4f31a..0021e34 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8 +Version: 1.8.2 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -121,6 +121,23 @@ Description: ======= Changes ======= + 1.8.2 (2014-04-17) + ++++++++++++++++++ + + * Fix ``urllib3.util`` not being included in the package. + + + 1.8.1 (2014-04-17) + ++++++++++++++++++ + + * Fix AppEngine bug of HTTPS requests going out as HTTP. (Issue #356) + + * Don't install ``dummyserver`` into ``site-packages`` as it's only needed + for the test suite. (Issue #362) + + * Added support for specifying ``source_address``. (Issue #352) + + 1.8 (2014-03-04) ++++++++++++++++ diff --git a/dummyserver/__init__.pyc b/dummyserver/__init__.pyc new file mode 100644 index 0000000..b017ac5 Binary files /dev/null and b/dummyserver/__init__.pyc differ diff --git a/dummyserver/__pycache__/__init__.cpython-33.pyc b/dummyserver/__pycache__/__init__.cpython-33.pyc new file mode 100644 index 0000000..d1e84e3 Binary files /dev/null and b/dummyserver/__pycache__/__init__.cpython-33.pyc differ diff --git a/dummyserver/__pycache__/handlers.cpython-33.pyc b/dummyserver/__pycache__/handlers.cpython-33.pyc new file mode 100644 index 0000000..e3bab97 Binary files /dev/null and b/dummyserver/__pycache__/handlers.cpython-33.pyc differ diff --git a/dummyserver/__pycache__/proxy.cpython-33.pyc b/dummyserver/__pycache__/proxy.cpython-33.pyc new file mode 100644 index 0000000..4cca456 Binary files /dev/null and b/dummyserver/__pycache__/proxy.cpython-33.pyc differ diff --git a/dummyserver/__pycache__/server.cpython-33.pyc b/dummyserver/__pycache__/server.cpython-33.pyc new file mode 100644 index 0000000..49504c9 Binary files /dev/null and b/dummyserver/__pycache__/server.cpython-33.pyc differ diff --git a/dummyserver/__pycache__/testcase.cpython-33.pyc b/dummyserver/__pycache__/testcase.cpython-33.pyc new file mode 100644 index 0000000..21e3d10 Binary files /dev/null and b/dummyserver/__pycache__/testcase.cpython-33.pyc differ diff --git a/dummyserver/certs/cacert.key b/dummyserver/certs/cacert.key new file mode 100644 index 0000000..fc8be6e --- /dev/null +++ b/dummyserver/certs/cacert.key @@ -0,0 +1,15 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICXgIBAAKBgQDKz8a9X2SfNms9TffyNaFO/K42fAjUI1dAM1G8TVoj0a81ay7W +z4R7V1zfjXFT/WoRW04Y6xek0bff0OtsW+AriooUy7+pPYnrchpAW0p7hPjH1DIB +Vab01CJMhQ24er92Q1dF4WBv4yKqEaV1IYz1cvqvCCJgAbsWn1I8Cna1lwIDAQAB +AoGAPpkK+oBrCkk9qFpcYUH0W/DZxK9b+j4+O+6bF8e4Pr4FmjNO7bZ3aap5W/bI +N+hLyLepzz8guRqR6l8NixCAi+JiVW/agh5o4Jrek8UJWQamwSL4nJ36U3Iw/l7w +vcN1txfkpsA2SB9QFPGfDKcP3+IZMOZ7uFLzk/gzgLYiCEECQQD+M5Lj+e/sNBkb +XeIBxWIrPfEeIkk4SDkqImzDjq1FcfxZkvfskqyJgUvcLe5hb+ibY8jqWvtpvFTI +5v/tzHvPAkEAzD8fNrGz8KiAVTo7+0vrb4AebAdSLZUvbp0AGs5pXUAuQx6VEgz8 +opNKpZjBwAFsZKlwhgDqaChiAt9aKUkzuQJBALlai9I2Dg7SkjgVRdX6wjE7slRB +tdgXOa+SeHJD1+5aRiJeeu8CqFJ/d/wtdbOQsTCVGwxfmREpZT00ywrvXpsCQQCU +gs1Kcrn5Ijx2PCrDFbfyUkFMoaIiXNipYGVkGHRKhtFcoo8YGfNUry7W7BTtbNuI +8h9MgLvw0nQ5zHf9jymZAkEA7o4uA6XSS1zUqEQ55bZRFHcz/99pLH35G906iwVb +d5rd1Z4Cf5s/91o5gwL6ZP2Ig34CCn+NSL4avgz6K0VUaA== +-----END RSA PRIVATE KEY----- diff --git a/dummyserver/certs/cacert.pem b/dummyserver/certs/cacert.pem new file mode 100644 index 0000000..38d32dc --- /dev/null +++ b/dummyserver/certs/cacert.pem @@ -0,0 +1,23 @@ +-----BEGIN CERTIFICATE----- +MIIDzDCCAzWgAwIBAgIJALPrscov4b/jMA0GCSqGSIb3DQEBBQUAMIGBMQswCQYD +VQQGEwJGSTEOMAwGA1UECBMFZHVtbXkxDjAMBgNVBAcTBWR1bW15MQ4wDAYDVQQK +EwVkdW1teTEOMAwGA1UECxMFZHVtbXkxETAPBgNVBAMTCFNuYWtlT2lsMR8wHQYJ +KoZIhvcNAQkBFhBkdW1teUB0ZXN0LmxvY2FsMB4XDTExMTIyMjA3NTYxNVoXDTIx +MTIxOTA3NTYxNVowgYExCzAJBgNVBAYTAkZJMQ4wDAYDVQQIEwVkdW1teTEOMAwG +A1UEBxMFZHVtbXkxDjAMBgNVBAoTBWR1bW15MQ4wDAYDVQQLEwVkdW1teTERMA8G +A1UEAxMIU25ha2VPaWwxHzAdBgkqhkiG9w0BCQEWEGR1bW15QHRlc3QubG9jYWww +gZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMrPxr1fZJ82az1N9/I1oU78rjZ8 +CNQjV0AzUbxNWiPRrzVrLtbPhHtXXN+NcVP9ahFbThjrF6TRt9/Q62xb4CuKihTL +v6k9ietyGkBbSnuE+MfUMgFVpvTUIkyFDbh6v3ZDV0XhYG/jIqoRpXUhjPVy+q8I +ImABuxafUjwKdrWXAgMBAAGjggFIMIIBRDAdBgNVHQ4EFgQUGXd/I2JiQllF+3Wd +x3NyBLszCi0wgbYGA1UdIwSBrjCBq4AUGXd/I2JiQllF+3Wdx3NyBLszCi2hgYek +gYQwgYExCzAJBgNVBAYTAkZJMQ4wDAYDVQQIEwVkdW1teTEOMAwGA1UEBxMFZHVt +bXkxDjAMBgNVBAoTBWR1bW15MQ4wDAYDVQQLEwVkdW1teTERMA8GA1UEAxMIU25h +a2VPaWwxHzAdBgkqhkiG9w0BCQEWEGR1bW15QHRlc3QubG9jYWyCCQCz67HKL+G/ +4zAPBgNVHRMBAf8EBTADAQH/MBEGCWCGSAGG+EIBAQQEAwIBBjAJBgNVHRIEAjAA +MCsGCWCGSAGG+EIBDQQeFhxUaW55Q0EgR2VuZXJhdGVkIENlcnRpZmljYXRlMA4G +A1UdDwEB/wQEAwICBDANBgkqhkiG9w0BAQUFAAOBgQBnnwtO8onsyhGOvS6cS8af +IRZyAXgouuPeP3Zrf5W80iZcV23u94969sPEIsD8Ujv5u0hUSrToGl4ahOMEOFNL +R5ndQOkh3VsepJnoE+RklZzbHWxU8onWlVzsNBFbclxidzaU3UHmdgXJAJL5nVSd +Zpn44QSS0UXsaC0mBimVNw== +-----END CERTIFICATE----- diff --git a/dummyserver/certs/client.csr b/dummyserver/certs/client.csr new file mode 100644 index 0000000..703d351 --- /dev/null +++ b/dummyserver/certs/client.csr @@ -0,0 +1,23 @@ +-----BEGIN CERTIFICATE----- +MIID1TCCAz6gAwIBAgIBAjANBgkqhkiG9w0BAQUFADCBgTELMAkGA1UEBhMCRkkx +DjAMBgNVBAgTBWR1bW15MQ4wDAYDVQQHEwVkdW1teTEOMAwGA1UEChMFZHVtbXkx +DjAMBgNVBAsTBWR1bW15MREwDwYDVQQDEwhTbmFrZU9pbDEfMB0GCSqGSIb3DQEJ +ARYQZHVtbXlAdGVzdC5sb2NhbDAeFw0xMTEyMjIwNzU5NTlaFw0yMTEyMTgwNzU5 +NTlaMH8xCzAJBgNVBAYTAkZJMQ4wDAYDVQQIEwVkdW1teTEOMAwGA1UEBxMFZHVt +bXkxDjAMBgNVBAoTBWR1bW15MQ4wDAYDVQQLEwVkdW1teTEPMA0GA1UEAxMGY2xp +ZW50MR8wHQYJKoZIhvcNAQkBFhBjbGllbnRAbG9jYWxob3N0MIGfMA0GCSqGSIb3 +DQEBAQUAA4GNADCBiQKBgQDaITA/XCzviqjex+lJJP+pgmQQ+ncUf+PDaFw86kWh +cWuI2eSBVaIaP6SsxYgIODQTjqYGjRogsd1Nvx3gRdIMEagTfVQyVwfDfNp8aT8v +SY/wDYFjsD07asmjGvwiu0sLp4t/tMz+x5ELlU4+hGnmPInH6hLK150DqgbNmJus +3wIDAQABo4IBXDCCAVgwCQYDVR0TBAIwADARBglghkgBhvhCAQEEBAMCBLAwKwYJ +YIZIAYb4QgENBB4WHFRpbnlDQSBHZW5lcmF0ZWQgQ2VydGlmaWNhdGUwHQYDVR0O +BBYEFG71FCU2yisH1GyrcqYaPKVeTWxBMIG2BgNVHSMEga4wgauAFBl3fyNiYkJZ +Rft1ncdzcgS7MwotoYGHpIGEMIGBMQswCQYDVQQGEwJGSTEOMAwGA1UECBMFZHVt +bXkxDjAMBgNVBAcTBWR1bW15MQ4wDAYDVQQKEwVkdW1teTEOMAwGA1UECxMFZHVt +bXkxETAPBgNVBAMTCFNuYWtlT2lsMR8wHQYJKoZIhvcNAQkBFhBkdW1teUB0ZXN0 +LmxvY2FsggkAs+uxyi/hv+MwCQYDVR0SBAIwADAbBgNVHREEFDASgRBjbGllbnRA +bG9jYWxob3N0MAsGA1UdDwQEAwIFoDANBgkqhkiG9w0BAQUFAAOBgQDEwZmp3yE8 +R4U9Ob/IeEo6O3p0T4o7GNvufGksM/mELmzyC+Qh/Ul6fNn+IhdKWpo61sMZou+n +eOufXVouc8dGhQ1Qi5s0i51d/ouhfYNs+AGRcpwEieVjZhgE1XfrNwvvjIx3yPtK +m9LSmCtVKcTWqOHQywKn+G83a+7bsh835Q== +-----END CERTIFICATE----- diff --git a/dummyserver/certs/client.key b/dummyserver/certs/client.key new file mode 100644 index 0000000..0d1c343 --- /dev/null +++ b/dummyserver/certs/client.key @@ -0,0 +1,15 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICWwIBAAKBgQDaITA/XCzviqjex+lJJP+pgmQQ+ncUf+PDaFw86kWhcWuI2eSB +VaIaP6SsxYgIODQTjqYGjRogsd1Nvx3gRdIMEagTfVQyVwfDfNp8aT8vSY/wDYFj +sD07asmjGvwiu0sLp4t/tMz+x5ELlU4+hGnmPInH6hLK150DqgbNmJus3wIDAQAB +AoGAKMMg+AYqo4z+57rl/nQ6jpu+RWn4zMzlbEPZUMzavEOsu8M0L3MoOs1/4YV8 +WUTffnQe1ISTyF5Uo82+MIX7rUtfJITFSQrIWe7AGdm6Nir8TQQ7fD97modXyAUx +69I9SQjQlseg5PCRCp/DfcBncvHeYuf8gAJK5FfC1VW1cQECQQDvzFNoGrwnsrtm +4gj1Kt0c20jkIYFN6iQ6Sjs/1fk1cXDeWzjPaa92zF+i+02Ma/eWJ0ZVrhisw6sv +zxGp+ByBAkEA6N4SpuGWytJqCRfwenQZ4Oa8mNcVo5ulGf/eUHVXvHewWxQ7xWRi +iWUj/z1byR9+yno8Yfd04kaNCPYN/ICZXwJAAf5//xCh2e6pkkx06J0Ho7LLI2KH +8b7tuDJf1cMQxHoCB0dY7JijZeiDLxbJ6U4IjA4djp7ZA67I4KfnLLOsgQJARLZS +dp+WKR7RXwGLWfasNCqhd8/veKlSnEtdxAv76Ya/qQBdaq9mS/hmGMh4Lu52MTTE +YHvuJ159+yjvk5Q2rQJABjlU1+GZqwv/7QM7GxfJO+GPI4PHv5Yji5s7LLu2c6dL +XY2XiTHQL9PnPrKp3+qDDzxjyej30lfz4he6E5pI+g== +-----END RSA PRIVATE KEY----- diff --git a/dummyserver/certs/client.pem b/dummyserver/certs/client.pem new file mode 100644 index 0000000..29aea38 --- /dev/null +++ b/dummyserver/certs/client.pem @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDqDCCAxGgAwIBAgIBATANBgkqhkiG9w0BAQUFADCBgTELMAkGA1UEBhMCRkkx +DjAMBgNVBAgTBWR1bW15MQ4wDAYDVQQHEwVkdW1teTEOMAwGA1UEChMFZHVtbXkx +DjAMBgNVBAsTBWR1bW15MREwDwYDVQQDEwhTbmFrZU9pbDEfMB0GCSqGSIb3DQEJ +ARYQZHVtbXlAdGVzdC5sb2NhbDAeFw0xMTEyMjIwNzU4NDBaFw0yMTEyMTgwNzU4 +NDBaMGExCzAJBgNVBAYTAkZJMQ4wDAYDVQQIEwVkdW1teTEOMAwGA1UEBxMFZHVt +bXkxDjAMBgNVBAoTBWR1bW15MQ4wDAYDVQQLEwVkdW1teTESMBAGA1UEAxMJbG9j +YWxob3N0MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDXe3FqmCWvP8XPxqtT ++0bfL1Tvzvebi46k0WIcUV8bP3vyYiSRXG9ALmyzZH4GHY9UVs4OEDkCMDOBSezB +0y9ai/9doTNcaictdEBu8nfdXKoTtzrn+VX4UPrkH5hm7NQ1fTQuj1MR7yBCmYqN +3Q2Q+Efuujyx0FwBzAuy1aKYuwIDAQABo4IBTTCCAUkwCQYDVR0TBAIwADARBglg +hkgBhvhCAQEEBAMCBkAwKwYJYIZIAYb4QgENBB4WHFRpbnlDQSBHZW5lcmF0ZWQg +Q2VydGlmaWNhdGUwHQYDVR0OBBYEFBvnSuVKLNPEFMAFqHw292vGHGJSMIG2BgNV +HSMEga4wgauAFBl3fyNiYkJZRft1ncdzcgS7MwotoYGHpIGEMIGBMQswCQYDVQQG +EwJGSTEOMAwGA1UECBMFZHVtbXkxDjAMBgNVBAcTBWR1bW15MQ4wDAYDVQQKEwVk +dW1teTEOMAwGA1UECxMFZHVtbXkxETAPBgNVBAMTCFNuYWtlT2lsMR8wHQYJKoZI +hvcNAQkBFhBkdW1teUB0ZXN0LmxvY2FsggkAs+uxyi/hv+MwCQYDVR0SBAIwADAZ +BgNVHREEEjAQgQ5yb290QGxvY2FsaG9zdDANBgkqhkiG9w0BAQUFAAOBgQBXdedG +XHLPmOVBeKWjTmaekcaQi44snhYqE1uXRoIQXQsyw+Ya5+n/uRxPKZO/C78EESL0 +8rnLTdZXm4GBYyHYmMy0AdWR7y030viOzAkWWRRRbuecsaUzFCI+F9jTV5LHuRzz +V8fUKwiEE9swzkWgMpfVTPFuPgzxwG9gMbrBfg== +-----END CERTIFICATE----- diff --git a/dummyserver/certs/client_bad.pem b/dummyserver/certs/client_bad.pem new file mode 100644 index 0000000..e9402fb --- /dev/null +++ b/dummyserver/certs/client_bad.pem @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE----- +MIICsDCCAhmgAwIBAgIJAL63Nc6KY94BMA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV +BAYTAkFVMRMwEQYDVQQIEwpTb21lLVN0YXRlMSEwHwYDVQQKExhJbnRlcm5ldCBX +aWRnaXRzIFB0eSBMdGQwHhcNMTExMDExMjMxMjAzWhcNMjExMDA4MjMxMjAzWjBF +MQswCQYDVQQGEwJBVTETMBEGA1UECBMKU29tZS1TdGF0ZTEhMB8GA1UEChMYSW50 +ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKB +gQC8HGxvblJ4Z0i/lIlG8jrNsFrCqYRAXtj3xdnnjfUpd/kNhU/KahMsG6urAe/4 +Yj+Zqf1sVnt0Cye8FZE3cN9RAcwJrlTCRiicJiXEbA7cPfMphqNGqjVHtmxQ1OsU +NHK7cxKa9OX3xmg4h55vxSZYgibAEPO2g3ueGk7RWIAQ8wIDAQABo4GnMIGkMB0G +A1UdDgQWBBSeeo/YRpdn5DK6bUI7ZDJ57pzGdDB1BgNVHSMEbjBsgBSeeo/YRpdn +5DK6bUI7ZDJ57pzGdKFJpEcwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgTClNvbWUt +U3RhdGUxITAfBgNVBAoTGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZIIJAL63Nc6K +Y94BMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEAOntoloMGt1325UR0 +GGEKQJbiRhLXY4otdgFjEvCG2RPZVLxWYhLMu0LkB6HBYULEuoy12ushtRWlhS1k +6PNRkaZ+LQTSREj6Do4c4zzLxCDmxYmejOz63cIWX2x5IY6qEx2BNOfmM4xEdF8W +LSGGbQfuAghiEh0giAi4AQloDlY= +-----END CERTIFICATE----- diff --git a/dummyserver/certs/server.crt b/dummyserver/certs/server.crt new file mode 100644 index 0000000..29aea38 --- /dev/null +++ b/dummyserver/certs/server.crt @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDqDCCAxGgAwIBAgIBATANBgkqhkiG9w0BAQUFADCBgTELMAkGA1UEBhMCRkkx +DjAMBgNVBAgTBWR1bW15MQ4wDAYDVQQHEwVkdW1teTEOMAwGA1UEChMFZHVtbXkx +DjAMBgNVBAsTBWR1bW15MREwDwYDVQQDEwhTbmFrZU9pbDEfMB0GCSqGSIb3DQEJ +ARYQZHVtbXlAdGVzdC5sb2NhbDAeFw0xMTEyMjIwNzU4NDBaFw0yMTEyMTgwNzU4 +NDBaMGExCzAJBgNVBAYTAkZJMQ4wDAYDVQQIEwVkdW1teTEOMAwGA1UEBxMFZHVt +bXkxDjAMBgNVBAoTBWR1bW15MQ4wDAYDVQQLEwVkdW1teTESMBAGA1UEAxMJbG9j +YWxob3N0MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDXe3FqmCWvP8XPxqtT ++0bfL1Tvzvebi46k0WIcUV8bP3vyYiSRXG9ALmyzZH4GHY9UVs4OEDkCMDOBSezB +0y9ai/9doTNcaictdEBu8nfdXKoTtzrn+VX4UPrkH5hm7NQ1fTQuj1MR7yBCmYqN +3Q2Q+Efuujyx0FwBzAuy1aKYuwIDAQABo4IBTTCCAUkwCQYDVR0TBAIwADARBglg +hkgBhvhCAQEEBAMCBkAwKwYJYIZIAYb4QgENBB4WHFRpbnlDQSBHZW5lcmF0ZWQg +Q2VydGlmaWNhdGUwHQYDVR0OBBYEFBvnSuVKLNPEFMAFqHw292vGHGJSMIG2BgNV +HSMEga4wgauAFBl3fyNiYkJZRft1ncdzcgS7MwotoYGHpIGEMIGBMQswCQYDVQQG +EwJGSTEOMAwGA1UECBMFZHVtbXkxDjAMBgNVBAcTBWR1bW15MQ4wDAYDVQQKEwVk +dW1teTEOMAwGA1UECxMFZHVtbXkxETAPBgNVBAMTCFNuYWtlT2lsMR8wHQYJKoZI +hvcNAQkBFhBkdW1teUB0ZXN0LmxvY2FsggkAs+uxyi/hv+MwCQYDVR0SBAIwADAZ +BgNVHREEEjAQgQ5yb290QGxvY2FsaG9zdDANBgkqhkiG9w0BAQUFAAOBgQBXdedG +XHLPmOVBeKWjTmaekcaQi44snhYqE1uXRoIQXQsyw+Ya5+n/uRxPKZO/C78EESL0 +8rnLTdZXm4GBYyHYmMy0AdWR7y030viOzAkWWRRRbuecsaUzFCI+F9jTV5LHuRzz +V8fUKwiEE9swzkWgMpfVTPFuPgzxwG9gMbrBfg== +-----END CERTIFICATE----- diff --git a/dummyserver/certs/server.csr b/dummyserver/certs/server.csr new file mode 100644 index 0000000..29aea38 --- /dev/null +++ b/dummyserver/certs/server.csr @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDqDCCAxGgAwIBAgIBATANBgkqhkiG9w0BAQUFADCBgTELMAkGA1UEBhMCRkkx +DjAMBgNVBAgTBWR1bW15MQ4wDAYDVQQHEwVkdW1teTEOMAwGA1UEChMFZHVtbXkx +DjAMBgNVBAsTBWR1bW15MREwDwYDVQQDEwhTbmFrZU9pbDEfMB0GCSqGSIb3DQEJ +ARYQZHVtbXlAdGVzdC5sb2NhbDAeFw0xMTEyMjIwNzU4NDBaFw0yMTEyMTgwNzU4 +NDBaMGExCzAJBgNVBAYTAkZJMQ4wDAYDVQQIEwVkdW1teTEOMAwGA1UEBxMFZHVt +bXkxDjAMBgNVBAoTBWR1bW15MQ4wDAYDVQQLEwVkdW1teTESMBAGA1UEAxMJbG9j +YWxob3N0MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDXe3FqmCWvP8XPxqtT ++0bfL1Tvzvebi46k0WIcUV8bP3vyYiSRXG9ALmyzZH4GHY9UVs4OEDkCMDOBSezB +0y9ai/9doTNcaictdEBu8nfdXKoTtzrn+VX4UPrkH5hm7NQ1fTQuj1MR7yBCmYqN +3Q2Q+Efuujyx0FwBzAuy1aKYuwIDAQABo4IBTTCCAUkwCQYDVR0TBAIwADARBglg +hkgBhvhCAQEEBAMCBkAwKwYJYIZIAYb4QgENBB4WHFRpbnlDQSBHZW5lcmF0ZWQg +Q2VydGlmaWNhdGUwHQYDVR0OBBYEFBvnSuVKLNPEFMAFqHw292vGHGJSMIG2BgNV +HSMEga4wgauAFBl3fyNiYkJZRft1ncdzcgS7MwotoYGHpIGEMIGBMQswCQYDVQQG +EwJGSTEOMAwGA1UECBMFZHVtbXkxDjAMBgNVBAcTBWR1bW15MQ4wDAYDVQQKEwVk +dW1teTEOMAwGA1UECxMFZHVtbXkxETAPBgNVBAMTCFNuYWtlT2lsMR8wHQYJKoZI +hvcNAQkBFhBkdW1teUB0ZXN0LmxvY2FsggkAs+uxyi/hv+MwCQYDVR0SBAIwADAZ +BgNVHREEEjAQgQ5yb290QGxvY2FsaG9zdDANBgkqhkiG9w0BAQUFAAOBgQBXdedG +XHLPmOVBeKWjTmaekcaQi44snhYqE1uXRoIQXQsyw+Ya5+n/uRxPKZO/C78EESL0 +8rnLTdZXm4GBYyHYmMy0AdWR7y030viOzAkWWRRRbuecsaUzFCI+F9jTV5LHuRzz +V8fUKwiEE9swzkWgMpfVTPFuPgzxwG9gMbrBfg== +-----END CERTIFICATE----- diff --git a/dummyserver/certs/server.key b/dummyserver/certs/server.key new file mode 100644 index 0000000..89ab057 --- /dev/null +++ b/dummyserver/certs/server.key @@ -0,0 +1,15 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICXgIBAAKBgQDXe3FqmCWvP8XPxqtT+0bfL1Tvzvebi46k0WIcUV8bP3vyYiSR +XG9ALmyzZH4GHY9UVs4OEDkCMDOBSezB0y9ai/9doTNcaictdEBu8nfdXKoTtzrn ++VX4UPrkH5hm7NQ1fTQuj1MR7yBCmYqN3Q2Q+Efuujyx0FwBzAuy1aKYuwIDAQAB +AoGBANOGBM6bbhq7ImYU4qf8+RQrdVg2tc9Fzo+yTnn30sF/rx8/AiCDOV4qdGAh +HKjKKaGj2H/rotqoEFcxBy05LrgJXxydBP72e9PYhNgKOcSmCQu4yALIPEXfKuIM +zgAErHVJ2l79fif3D4hzNyz+u5E1A9n3FG9cgaJSiYP8IG2RAkEA82GZ8rBkSGQQ +ZQ3oFuzPAAL21lbj8D0p76fsCpvS7427DtZDOjhOIKZmaeykpv+qSzRraqEqjDRi +S4kjQvwh6QJBAOKniZ+NDo2lSpbOFk+XlmABK1DormVpj8KebHEZYok1lRI+WiX9 +Nnoe9YLgix7++6H5SBBCcTB4HvM+5A4BuwMCQQChcX/eZbXP81iQwB3Rfzp8xnqY +icDf7qKvz9Ma4myU7Y5E9EpaB1mD/P14jDpYcMW050vNyqTfpiwB8TFL0NZpAkEA +02jkFH9UyMgZV6qo4tqI98l/ZrtyF8OrxSNSEPhVkZf6EQc5vN9/lc8Uv1vESEgb +3AwRrKDcxRH2BHtv6qSwkwJAGjqnkIcEkA75r1e55/EF2chcZW1+tpwKupE8CtAH +VXGd5DVwt4cYWkLUj2gF2fJbV97uu2MAg5CFDb+vQ6p5eA== +-----END RSA PRIVATE KEY----- diff --git a/dummyserver/certs/server.key.org b/dummyserver/certs/server.key.org new file mode 100644 index 0000000..709082e --- /dev/null +++ b/dummyserver/certs/server.key.org @@ -0,0 +1,12 @@ +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,8B3708EAD53963D4 + +uyLo4sFmSo7+K1uVgSENI+85JsG5o1JmovvxD/ucUl9CDhDj4KgFzs95r7gjjlhS +kA/hIY8Ec9i6T3zMXpAswWI5Mv2LE+UdYR5h60dYtIinLC7KF0QIztSecNWy20Bi +/NkobZhN7VZUuCEoSRWj4Ia3EuATF8Y9ZRGFPNsqMbSAhsGZ1P5xbDMEpE+5PbJP +LvdF9yWDT77rHeI4CKV4aP/yxtm1heEhKw5o6hdpPBQajPpjSQbh7/V6Qd0QsKcV +n27kPnSabsTbbc2IR40il4mZfHvXAlp4KoHL3RUgaons7q0hAUpUi+vJXbEukGGt +3dlyWwKwEFS7xBQ1pQvzcePI4/fRQxhZNxeFZW6n12Y3X61vg1IsG7usPhRe3iDP +3g1MXQMAhxaECnDN9b006IeoYdaktd4wrs/fn8x6Yz4= +-----END RSA PRIVATE KEY----- diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index bc51f31..5d6e2e6 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -70,6 +70,10 @@ class TestingApp(WSGIHandler): "Render simple message" return Response("Dummy server!") + def source_address(self, request): + """Return the requester's IP address.""" + return Response(request.remote_ip) + def set_up(self, request): test_type = request.params.get('test_type') test_id = request.params.get('test_id') diff --git a/dummyserver/handlers.pyc b/dummyserver/handlers.pyc new file mode 100644 index 0000000..ddf66d2 Binary files /dev/null and b/dummyserver/handlers.pyc differ diff --git a/dummyserver/proxy.pyc b/dummyserver/proxy.pyc new file mode 100644 index 0000000..a23689e Binary files /dev/null and b/dummyserver/proxy.pyc differ diff --git a/dummyserver/server.pyc b/dummyserver/server.pyc new file mode 100644 index 0000000..c0df815 Binary files /dev/null and b/dummyserver/server.pyc differ diff --git a/dummyserver/testcase.pyc b/dummyserver/testcase.pyc new file mode 100644 index 0000000..a1f9bdf Binary files /dev/null and b/dummyserver/testcase.pyc differ diff --git a/setup.py b/setup.py index 392b885..92fad33 100644 --- a/setup.py +++ b/setup.py @@ -44,9 +44,9 @@ setup(name='urllib3', author_email='andrey.petrov@shazow.net', url='http://urllib3.readthedocs.org/', license='MIT', - packages=['urllib3', 'dummyserver', + packages=['urllib3', 'urllib3.packages', 'urllib3.packages.ssl_match_hostname', - 'urllib3.contrib', + 'urllib3.contrib', 'urllib3.util', ], requires=requirements, tests_require=tests_requirements, diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 2faab94..759b5e3 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -2,7 +2,10 @@ import unittest from urllib3.poolmanager import PoolManager from urllib3 import connection_from_url -from urllib3.exceptions import ClosedPoolError +from urllib3.exceptions import ( + ClosedPoolError, + LocationParseError, +) class TestPoolManager(unittest.TestCase): @@ -63,6 +66,9 @@ class TestPoolManager(unittest.TestCase): self.assertEqual(len(p.pools), 0) + def test_nohost(self): + p = PoolManager(5) + self.assertRaises(LocationParseError, p.connection_from_url, 'http://@') if __name__ == '__main__': diff --git a/test/test_util.py b/test/test_util.py index ebd3b5f..5dcaeab 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -77,6 +77,7 @@ class TestUtil(unittest.TestCase): for location in invalid_host: self.assertRaises(LocationParseError, get_host, location) + def test_parse_url(self): url_host_map = { 'http://google.com/mail': Url('http', host='google.com', path='/mail'), @@ -107,6 +108,7 @@ class TestUtil(unittest.TestCase): 'http://foo:bar@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'), 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'), 'http://foo:bar@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'), + 'http://@': Url('http', host=None, auth='') } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) @@ -231,7 +233,7 @@ class TestUtil(unittest.TestCase): self.assertTrue('int or float' in str(e)) - @patch('urllib3.util.current_time') + @patch('urllib3.util.timeout.current_time') def test_timeout(self, current_time): timeout = Timeout(total=3) @@ -278,7 +280,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(str(timeout), "Timeout(connect=1, read=None, total=3)") - @patch('urllib3.util.current_time') + @patch('urllib3.util.timeout.current_time') def test_timeout_elapsed(self, current_time): current_time.return_value = TIMEOUT_EPOCH timeout = Timeout(total=3) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 6a4f31a..0021e34 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8 +Version: 1.8.2 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -121,6 +121,23 @@ Description: ======= Changes ======= + 1.8.2 (2014-04-17) + ++++++++++++++++++ + + * Fix ``urllib3.util`` not being included in the package. + + + 1.8.1 (2014-04-17) + ++++++++++++++++++ + + * Fix AppEngine bug of HTTPS requests going out as HTTP. (Issue #356) + + * Don't install ``dummyserver`` into ``site-packages`` as it's only needed + for the test suite. (Issue #362) + + * Added support for specifying ``source_address``. (Issue #352) + + 1.8 (2014-03-04) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index a5170fb..72e3351 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -7,10 +7,30 @@ setup.cfg setup.py test-requirements.txt dummyserver/__init__.py +dummyserver/__init__.pyc dummyserver/handlers.py +dummyserver/handlers.pyc dummyserver/proxy.py +dummyserver/proxy.pyc dummyserver/server.py +dummyserver/server.pyc dummyserver/testcase.py +dummyserver/testcase.pyc +dummyserver/__pycache__/__init__.cpython-33.pyc +dummyserver/__pycache__/handlers.cpython-33.pyc +dummyserver/__pycache__/proxy.cpython-33.pyc +dummyserver/__pycache__/server.cpython-33.pyc +dummyserver/__pycache__/testcase.cpython-33.pyc +dummyserver/certs/cacert.key +dummyserver/certs/cacert.pem +dummyserver/certs/client.csr +dummyserver/certs/client.key +dummyserver/certs/client.pem +dummyserver/certs/client_bad.pem +dummyserver/certs/server.crt +dummyserver/certs/server.csr +dummyserver/certs/server.key +dummyserver/certs/server.key.org test/test_collections.py test/test_compatibility.py test/test_connectionpool.py @@ -31,7 +51,6 @@ urllib3/filepost.py urllib3/poolmanager.py urllib3/request.py urllib3/response.py -urllib3/util.py urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt @@ -43,4 +62,11 @@ urllib3/packages/__init__.py urllib3/packages/ordered_dict.py urllib3/packages/six.py urllib3/packages/ssl_match_hostname/__init__.py -urllib3/packages/ssl_match_hostname/_implementation.py \ No newline at end of file +urllib3/packages/ssl_match_hostname/_implementation.py +urllib3/util/__init__.py +urllib3/util/connection.py +urllib3/util/request.py +urllib3/util/response.py +urllib3/util/ssl_.py +urllib3/util/timeout.py +urllib3/util/url.py \ No newline at end of file diff --git a/urllib3.egg-info/top_level.txt b/urllib3.egg-info/top_level.txt index 93675d9..a42590b 100644 --- a/urllib3.egg-info/top_level.txt +++ b/urllib3.egg-info/top_level.txt @@ -1,2 +1 @@ urllib3 -dummyserver diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 086387f..bd237a6 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.8' +__version__ = '1.8.2' from .connectionpool import ( diff --git a/urllib3/connection.py b/urllib3/connection.py index 662bd2e..de7b925 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -68,15 +68,17 @@ class HTTPConnection(_HTTPConnection, object): def __init__(self, *args, **kw): if six.PY3: # Python 3 kw.pop('strict', None) - - if sys.version_info < (2, 7): # Python 2.6 and earlier + if sys.version_info < (2, 7): # Python 2.6 and older kw.pop('source_address', None) - self.source_address = None - _HTTPConnection.__init__(self, *args, **kw) + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') + + # Superclass also sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): - """ Establish a socket connection and set nodelay settings on it + """ Establish a socket connection and set nodelay settings on it. :return: a new socket connection """ @@ -85,12 +87,10 @@ class HTTPConnection(_HTTPConnection, object): extra_args.append(self.source_address) conn = socket.create_connection( - (self.host, self.port), - self.timeout, - *extra_args - ) - conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + (self.host, self.port), self.timeout, *extra_args) + conn.setsockopt( + socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) + return conn def _prepare_conn(self, conn): @@ -108,17 +108,18 @@ class HTTPSConnection(HTTPConnection): default_port = port_by_scheme['https'] def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None): + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): - HTTPConnection.__init__(self, host, port, - strict=strict, - timeout=timeout, - source_address=source_address) + HTTPConnection.__init__(self, host, port, strict=strict, + timeout=timeout, **kw) self.key_file = key_file self.cert_file = cert_file + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + def connect(self): conn = self._new_conn() self._prepare_conn(conn) @@ -133,6 +134,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None + conn_kw = {} def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -147,11 +149,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification + try: sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout, - ) + address=(self.host, self.port), timeout=self.timeout, + **self.conn_kw) except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 6d0dbb1..95a53a7 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -4,6 +4,7 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php +import sys import errno import logging @@ -23,6 +24,7 @@ from .exceptions import ( ConnectTimeoutError, EmptyPoolError, HostChangedError, + LocationParseError, MaxRetryError, SSLError, TimeoutError, @@ -40,7 +42,6 @@ from .connection import ( from .request import RequestMethods from .response import HTTPResponse from .util import ( - assert_fingerprint, get_host, is_connection_dropped, Timeout, @@ -65,6 +66,9 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): + if host is None: + raise LocationParseError(host) + # httplib doesn't like it when we include brackets in ipv6 addresses host = host.strip('[]') @@ -136,7 +140,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None): + headers=None, _proxy=None, _proxy_headers=None, **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) @@ -163,6 +167,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections = 0 self.num_requests = 0 + if sys.version_info < (2, 7): # Python 2.6 and older + conn_kw.pop('source_address', None) + self.conn_kw = conn_kw + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -173,7 +181,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, - strict=self.strict) + strict=self.strict, **self.conn_kw) if self.proxy is not None: # Enable Nagle's algorithm for proxies, to avoid packet # fragmentation. @@ -594,10 +602,14 @@ class HTTPSConnectionPool(HTTPConnectionPool): _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, - assert_hostname=None, assert_fingerprint=None): + assert_hostname=None, assert_fingerprint=None, + **conn_kw): + + if sys.version_info < (2, 7): # Python 2.6 or older + conn_kw.pop('source_address', None) HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers) + block, headers, _proxy, _proxy_headers, **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -605,6 +617,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + self.conn_kw = conn_kw def _prepare_conn(self, conn): """ @@ -620,6 +633,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version + conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ @@ -656,6 +670,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict + extra_params.update(self.conn_kw) conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 7c513f3..21a12c6 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -1,4 +1,7 @@ -'''SSL with SNI_-support for Python 2. +'''SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. This needs the following packages installed: @@ -6,9 +9,15 @@ This needs the following packages installed: * ndg-httpsclient (tested with 0.3.2) * pyasn1 (tested with 0.1.6) -To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. -This can be done in a ``sitecustomize`` module, or at any other time before -your application begins using ``urllib3``, like this:: +You can install them with the following command: + + pip install pyopenssl ndg-httpsclient pyasn1 + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: try: import urllib3.contrib.pyopenssl diff --git a/urllib3/util.py b/urllib3/util.py deleted file mode 100644 index bd26631..0000000 --- a/urllib3/util.py +++ /dev/null @@ -1,648 +0,0 @@ -# urllib3/util.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -from base64 import b64encode -from binascii import hexlify, unhexlify -from collections import namedtuple -from hashlib import md5, sha1 -from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT -import time - -try: - from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms - poll = False - try: - from select import select - except ImportError: # `select` doesn't exist on AppEngine. - select = False - -try: # Test for SSL features - SSLContext = None - HAS_SNI = False - - import ssl - from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? - from ssl import HAS_SNI # Has SNI? -except ImportError: - pass - -from .packages import six -from .exceptions import LocationParseError, SSLError, TimeoutStateError - - -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout - - -def current_time(): - """ - Retrieve the current time, this function is mocked out in unit testing. - """ - return time.time() - - -class Timeout(object): - """ - Utility object for storing timeout values. - - Example usage: - - .. code-block:: python - - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc - - :param connect: - The maximum amount of time to wait for a connection attempt to a server - to succeed. Omitting the parameter will default the connect timeout to - the system default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout for connection attempts. - - :type connect: integer, float, or None - - :param read: - The maximum amount of time to wait between consecutive - read operations for a response from the server. Omitting - the parameter will default the read timeout to the system - default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout. - - :type read: integer, float, or None - - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - - .. note:: - - Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. - - In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, - not the total amount of time for the request to return a complete - response. For most requests, the timeout is raised because the server - has not sent the first byte in the specified time. This is not always - the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. - - If your goal is to cut off any request after a set amount of wall clock - time, consider having a second "watcher" thread to cut off a slow - request. - """ - - #: A sentinel object representing the default timeout value - DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - - def __init__(self, total=None, connect=_Default, read=_Default): - self._connect = self._validate_timeout(connect, 'connect') - self._read = self._validate_timeout(read, 'read') - self.total = self._validate_timeout(total, 'total') - self._start_connect = None - - def __str__(self): - return '%s(connect=%r, read=%r, total=%r)' % ( - type(self).__name__, self._connect, self._read, self.total) - - - @classmethod - def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid - - :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero - """ - if value is _Default: - return cls.DEFAULT_TIMEOUT - - if value is None or value is cls.DEFAULT_TIMEOUT: - return value - - try: - float(value) - except (TypeError, ValueError): - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - try: - if value < 0: - raise ValueError("Attempted to set %s timeout to %s, but the " - "timeout cannot be set to a value less " - "than 0." % (name, value)) - except TypeError: # Python 3 - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - return value - - @classmethod - def from_float(cls, timeout): - """ Create a new Timeout from a legacy timeout value. - - The timeout value used by httplib.py sets the same timeout on the - connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. - - :param timeout: The legacy timeout value - :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object - :rtype: :class:`Timeout` - """ - return Timeout(read=timeout, connect=timeout) - - def clone(self): - """ Create a copy of the timeout object - - Timeout properties are stored per-pool but each request needs a fresh - Timeout object to ensure each one has its own start/stop configured. - - :return: a copy of the timeout object - :rtype: :class:`Timeout` - """ - # We can't use copy.deepcopy because that will also create a new object - # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to - # detect the user default. - return Timeout(connect=self._connect, read=self._read, - total=self.total) - - def start_connect(self): - """ Start the timeout clock, used during a connect() attempt - - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to start a timer that has been started already. - """ - if self._start_connect is not None: - raise TimeoutStateError("Timeout timer has already been started.") - self._start_connect = current_time() - return self._start_connect - - def get_connect_duration(self): - """ Gets the time elapsed since the call to :meth:`start_connect`. - - :return: the elapsed time - :rtype: float - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to get duration for a timer that hasn't been started. - """ - if self._start_connect is None: - raise TimeoutStateError("Can't get connect duration for timer " - "that has not started.") - return current_time() - self._start_connect - - @property - def connect_timeout(self): - """ Get the value to use when setting a connection timeout. - - This will be a positive float or integer, the value None - (never timeout), or the default system timeout. - - :return: the connect timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - """ - if self.total is None: - return self._connect - - if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: - return self.total - - return min(self._connect, self.total) - - @property - def read_timeout(self): - """ Get the value for the read timeout. - - This assumes some time has elapsed in the connection timeout and - computes the read timeout appropriately. - - If self.total is set, the read timeout is dependent on the amount of - time taken by the connect timeout. If the connection time has not been - established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be - raised. - - :return: the value to use for the read timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` - has not yet been called on this object. - """ - if (self.total is not None and - self.total is not self.DEFAULT_TIMEOUT and - self._read is not None and - self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. - if self._start_connect is None: - return self._read - return max(0, min(self.total - self.get_connect_duration(), - self._read)) - elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: - return max(0, self.total - self.get_connect_duration()) - else: - return self._read - - -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): - """ - Datastructure for representing an HTTP URL. Used as a return value for - :func:`parse_url`. - """ - slots = () - - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) - - @property - def hostname(self): - """For backwards-compatibility with urlparse. We're nice like that.""" - return self.host - - @property - def request_uri(self): - """Absolute path including the query string.""" - uri = self.path or '/' - - if self.query is not None: - uri += '?' + self.query - - return uri - - @property - def netloc(self): - """Network location including host and port""" - if self.port: - return '%s:%d' % (self.host, self.port) - return self.host - - -def split_first(s, delims): - """ - Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts and the matched delimiter. - - If not found, then the first part is the full input string. - - Example: :: - - >>> split_first('foo/bar?baz', '?/=') - ('foo', 'bar?baz', '/') - >>> split_first('foo/bar?baz', '123') - ('foo/bar?baz', '', None) - - Scales linearly with number of delims. Not ideal for large number of delims. - """ - min_idx = None - min_delim = None - for d in delims: - idx = s.find(d) - if idx < 0: - continue - - if min_idx is None or idx < min_idx: - min_idx = idx - min_delim = d - - if min_idx is None or min_idx < 0: - return s, '', None - - return s[:min_idx], s[min_idx+1:], min_delim - - -def parse_url(url): - """ - Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is - performed to parse incomplete urls. Fields not provided will be None. - - Partly backwards-compatible with :mod:`urlparse`. - - Example: :: - - >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> parse_url('google.com:80') - Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> parse_url('/foo?bar') - Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) - """ - - # While this code has overlap with stdlib's urlparse, it is much - # simplified for our needs and less annoying. - # Additionally, this implementations does silly things to be optimal - # on CPython. - - scheme = None - auth = None - host = None - port = None - path = None - fragment = None - query = None - - # Scheme - if '://' in url: - scheme, url = url.split('://', 1) - - # Find the earliest Authority Terminator - # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, path_, delim = split_first(url, ['/', '?', '#']) - - if delim: - # Reassemble the path - path = delim + path_ - - # Auth - if '@' in url: - # Last '@' denotes end of auth part - auth, url = url.rsplit('@', 1) - - # IPv6 - if url and url[0] == '[': - host, url = url.split(']', 1) - host += ']' - - # Port - if ':' in url: - _host, port = url.split(':', 1) - - if not host: - host = _host - - if port: - # If given, ports must be integers. - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - port = int(port) - else: - # Blank ports are cool, too. (rfc3986#section-3.2.3) - port = None - - elif not host and url: - host = url - - if not path: - return Url(scheme, auth, host, port, path, query, fragment) - - # Fragment - if '#' in path: - path, fragment = path.split('#', 1) - - # Query - if '?' in path: - path, query = path.split('?', 1) - - return Url(scheme, auth, host, port, path, query, fragment) - - -def get_host(url): - """ - Deprecated. Use :func:`.parse_url` instead. - """ - p = parse_url(url) - return p.scheme or 'http', p.hostname, p.port - - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - if proxy_basic_auth: - headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') - - return headers - - -def is_connection_dropped(conn): # Platform-specific - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - :class:`httplib.HTTPConnection` object. - - Note: For platforms like AppEngine, this will always return ``False`` to - let the platform handle connection recycling transparently for us. - """ - sock = getattr(conn, 'sock', False) - if not sock: # Platform-specific: AppEngine - return False - - if not poll: - if not select: # Platform-specific: AppEngine - return False - - try: - return select([sock], [], [], 0.0)[0] - except SocketError: - return True - - # This version is better on platforms that support it. - p = poll() - p.register(sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True - - -def resolve_cert_reqs(candidate): - """ - Resolves the argument to a numeric constant, which can be passed to - the wrap_socket function/method from the ssl module. - Defaults to :data:`ssl.CERT_NONE`. - If given a string it is assumed to be the name of the constant in the - :mod:`ssl` module or its abbrevation. - (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. - If it's neither `None` nor a string we assume it is already the numeric - constant which can directly be passed to wrap_socket. - """ - if candidate is None: - return CERT_NONE - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'CERT_' + candidate) - return res - - return candidate - - -def resolve_ssl_version(candidate): - """ - like resolve_cert_reqs - """ - if candidate is None: - return PROTOCOL_SSLv23 - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'PROTOCOL_' + candidate) - return res - - return candidate - - -def assert_fingerprint(cert, fingerprint): - """ - Checks if given fingerprint matches the supplied certificate. - - :param cert: - Certificate as bytes object. - :param fingerprint: - Fingerprint as string of hexdigits, can be interspersed by colons. - """ - - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1 - } - - fingerprint = fingerprint.replace(':', '').lower() - - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') - - # We need encode() here for py32; works on py2 and p33. - fingerprint_bytes = unhexlify(fingerprint.encode()) - - hashfunc = hashfunc_map[digest_length] - - cert_digest = hashfunc(cert).digest() - - if not cert_digest == fingerprint_bytes: - raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) - -def is_fp_closed(obj): - """ - Checks whether a given file-like object is closed. - - :param obj: - The file-like object to check. - """ - if hasattr(obj, 'fp'): - # Object is a container for another file-like object that gets released - # on exhaustion (e.g. HTTPResponse) - return obj.fp is None - - return obj.closed - - -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION - - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError - raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py new file mode 100644 index 0000000..a40185e --- /dev/null +++ b/urllib3/util/__init__.py @@ -0,0 +1,27 @@ +# urllib3/util/__init__.py +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import ( + current_time, + Timeout, +) +from .url import ( + get_host, + parse_url, + split_first, + Url, +) diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py new file mode 100644 index 0000000..8deeab5 --- /dev/null +++ b/urllib3/util/connection.py @@ -0,0 +1,45 @@ +from socket import error as SocketError +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if sock is False: # Platform-specific: AppEngine + return False + if sock is None: # Connection already closed (such as by httplib). + return False + + if not poll: + if not select: # Platform-specific: AppEngine + return False + + try: + return select([sock], [], [], 0.0)[0] + except SocketError: + return True + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True + + + diff --git a/urllib3/util/request.py b/urllib3/util/request.py new file mode 100644 index 0000000..d48d651 --- /dev/null +++ b/urllib3/util/request.py @@ -0,0 +1,68 @@ +from base64 import b64encode + +from ..packages import six + + +ACCEPT_ENCODING = 'gzip,deflate' + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(six.b(proxy_basic_auth)).decode('utf-8') + + return headers + + diff --git a/urllib3/util/response.py b/urllib3/util/response.py new file mode 100644 index 0000000..d0325bc --- /dev/null +++ b/urllib3/util/response.py @@ -0,0 +1,13 @@ +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + if hasattr(obj, 'fp'): + # Object is a container for another file-like object that gets released + # on exhaustion (e.g. HTTPResponse) + return obj.fp is None + + return obj.closed diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py new file mode 100644 index 0000000..dee4b87 --- /dev/null +++ b/urllib3/util/ssl_.py @@ -0,0 +1,133 @@ +from binascii import hexlify, unhexlify +from hashlib import md5, sha1 + +from ..exceptions import SSLError + + +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + + digest_length, rest = divmod(len(fingerprint), 2) + + if rest or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py new file mode 100644 index 0000000..4f947cb --- /dev/null +++ b/urllib3/util/timeout.py @@ -0,0 +1,234 @@ +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/urllib3/util/url.py b/urllib3/util/url.py new file mode 100644 index 0000000..362d216 --- /dev/null +++ b/urllib3/util/url.py @@ -0,0 +1,162 @@ +from collections import namedtuple + +from ..exceptions import LocationParseError + + +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example: :: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example: :: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError(url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port -- cgit v1.2.3 From 73be7d6cc85a90ab4f67ffc27dc7eae672f7741f Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:38 -0700 Subject: Imported Upstream version 1.8.3 --- CHANGES.rst | 22 +++++ MANIFEST.in | 1 + PKG-INFO | 34 ++++++- README.rst | 10 +- dummyserver/__init__.pyc | Bin 141 -> 0 bytes dummyserver/__pycache__/__init__.cpython-33.pyc | Bin 149 -> 0 bytes dummyserver/__pycache__/handlers.cpython-33.pyc | Bin 10341 -> 0 bytes dummyserver/__pycache__/proxy.cpython-33.pyc | Bin 5809 -> 0 bytes dummyserver/__pycache__/server.cpython-33.pyc | Bin 6928 -> 0 bytes dummyserver/__pycache__/testcase.cpython-33.pyc | Bin 7203 -> 0 bytes dummyserver/handlers.pyc | Bin 8967 -> 0 bytes dummyserver/proxy.pyc | Bin 4740 -> 0 bytes dummyserver/server.pyc | Bin 5992 -> 0 bytes dummyserver/testcase.pyc | Bin 5090 -> 0 bytes setup.cfg | 3 + test/test_util.py | 13 +++ urllib3.egg-info/PKG-INFO | 34 ++++++- urllib3.egg-info/SOURCES.txt | 10 -- urllib3/__init__.py | 2 +- urllib3/_collections.py | 2 +- urllib3/connection.py | 119 +++++++++++++++--------- urllib3/connectionpool.py | 83 ++++++++--------- urllib3/fields.py | 22 +++-- urllib3/filepost.py | 9 +- urllib3/poolmanager.py | 2 +- urllib3/request.py | 36 +++---- urllib3/response.py | 55 +++++------ urllib3/util/connection.py | 6 +- urllib3/util/request.py | 14 ++- urllib3/util/timeout.py | 31 +++--- urllib3/util/url.py | 10 +- 31 files changed, 328 insertions(+), 190 deletions(-) delete mode 100644 dummyserver/__init__.pyc delete mode 100644 dummyserver/__pycache__/__init__.cpython-33.pyc delete mode 100644 dummyserver/__pycache__/handlers.cpython-33.pyc delete mode 100644 dummyserver/__pycache__/proxy.cpython-33.pyc delete mode 100644 dummyserver/__pycache__/server.cpython-33.pyc delete mode 100644 dummyserver/__pycache__/testcase.cpython-33.pyc delete mode 100644 dummyserver/handlers.pyc delete mode 100644 dummyserver/proxy.pyc delete mode 100644 dummyserver/server.pyc delete mode 100644 dummyserver/testcase.pyc diff --git a/CHANGES.rst b/CHANGES.rst index 3f836e9..4d90ce2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,28 @@ Changes ======= +1.8.3 (2014-06-23) +++++++++++++++++++ + +* Fix TLS verification when using a proxy in Python 3.4.1. (Issue #385) + +* Add ``disable_cache`` option to ``urllib3.util.make_headers``. (Issue #393) + +* Wrap ``socket.timeout`` exception with + ``urllib3.exceptions.ReadTimeoutError``. (Issue #399) + +* Fixed proxy-related bug where connections were being reused incorrectly. + (Issues #366, #369) + +* Added ``socket_options`` keyword parameter which allows to define + ``setsockopt`` configuration of new sockets. (Issue #397) + +* Removed ``HTTPConnection.tcp_nodelay`` in favor of + ``HTTPConnection.default_socket_options``. (Issue #397) + +* Fixed ``TypeError`` bug in Python 2.6.4. (Issue #411) + + 1.8.2 (2014-04-17) ++++++++++++++++++ diff --git a/MANIFEST.in b/MANIFEST.in index 3f344d1..3c2189a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt test-requirements.txt recursive-include dummyserver *.* +prune *.pyc diff --git a/PKG-INFO b/PKG-INFO index 0021e34..8e4fc2f 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8.2 +Version: 1.8.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -28,7 +28,14 @@ Description: ======= - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at - `Requests `_ which is also powered by urllib3. + `Requests `_ which is also powered by ``urllib3``. + + You might already be using urllib3! + =================================== + + ``urllib3`` powers `many great Python libraries `_, + including ``pip`` and ``requests``. + What's wrong with urllib and urllib2? ===================================== @@ -99,6 +106,7 @@ Description: ======= py27: commands succeeded py32: commands succeeded py33: commands succeeded + py34: commands succeeded Note that code coverage less than 100% is regarded as a failing run. @@ -121,6 +129,28 @@ Description: ======= Changes ======= + 1.8.3 (2014-06-23) + ++++++++++++++++++ + + * Fix TLS verification when using a proxy in Python 3.4.1. (Issue #385) + + * Add ``disable_cache`` option to ``urllib3.util.make_headers``. (Issue #393) + + * Wrap ``socket.timeout`` exception with + ``urllib3.exceptions.ReadTimeoutError``. (Issue #399) + + * Fixed proxy-related bug where connections were being reused incorrectly. + (Issues #366, #369) + + * Added ``socket_options`` keyword parameter which allows to define + ``setsockopt`` configuration of new sockets. (Issue #397) + + * Removed ``HTTPConnection.tcp_nodelay`` in favor of + ``HTTPConnection.default_socket_options``. (Issue #397) + + * Fixed ``TypeError`` bug in Python 2.6.4. (Issue #411) + + 1.8.2 (2014-04-17) ++++++++++++++++++ diff --git a/README.rst b/README.rst index b126647..e76c261 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,14 @@ Highlights - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at - `Requests `_ which is also powered by urllib3. + `Requests `_ which is also powered by ``urllib3``. + +You might already be using urllib3! +=================================== + +``urllib3`` powers `many great Python libraries `_, +including ``pip`` and ``requests``. + What's wrong with urllib and urllib2? ===================================== @@ -91,6 +98,7 @@ analysis while running test suite. Easiest way to run the tests is thusly the py27: commands succeeded py32: commands succeeded py33: commands succeeded + py34: commands succeeded Note that code coverage less than 100% is regarded as a failing run. diff --git a/dummyserver/__init__.pyc b/dummyserver/__init__.pyc deleted file mode 100644 index b017ac5..0000000 Binary files a/dummyserver/__init__.pyc and /dev/null differ diff --git a/dummyserver/__pycache__/__init__.cpython-33.pyc b/dummyserver/__pycache__/__init__.cpython-33.pyc deleted file mode 100644 index d1e84e3..0000000 Binary files a/dummyserver/__pycache__/__init__.cpython-33.pyc and /dev/null differ diff --git a/dummyserver/__pycache__/handlers.cpython-33.pyc b/dummyserver/__pycache__/handlers.cpython-33.pyc deleted file mode 100644 index e3bab97..0000000 Binary files a/dummyserver/__pycache__/handlers.cpython-33.pyc and /dev/null differ diff --git a/dummyserver/__pycache__/proxy.cpython-33.pyc b/dummyserver/__pycache__/proxy.cpython-33.pyc deleted file mode 100644 index 4cca456..0000000 Binary files a/dummyserver/__pycache__/proxy.cpython-33.pyc and /dev/null differ diff --git a/dummyserver/__pycache__/server.cpython-33.pyc b/dummyserver/__pycache__/server.cpython-33.pyc deleted file mode 100644 index 49504c9..0000000 Binary files a/dummyserver/__pycache__/server.cpython-33.pyc and /dev/null differ diff --git a/dummyserver/__pycache__/testcase.cpython-33.pyc b/dummyserver/__pycache__/testcase.cpython-33.pyc deleted file mode 100644 index 21e3d10..0000000 Binary files a/dummyserver/__pycache__/testcase.cpython-33.pyc and /dev/null differ diff --git a/dummyserver/handlers.pyc b/dummyserver/handlers.pyc deleted file mode 100644 index ddf66d2..0000000 Binary files a/dummyserver/handlers.pyc and /dev/null differ diff --git a/dummyserver/proxy.pyc b/dummyserver/proxy.pyc deleted file mode 100644 index a23689e..0000000 Binary files a/dummyserver/proxy.pyc and /dev/null differ diff --git a/dummyserver/server.pyc b/dummyserver/server.pyc deleted file mode 100644 index c0df815..0000000 Binary files a/dummyserver/server.pyc and /dev/null differ diff --git a/dummyserver/testcase.pyc b/dummyserver/testcase.pyc deleted file mode 100644 index a1f9bdf..0000000 Binary files a/dummyserver/testcase.pyc and /dev/null differ diff --git a/setup.cfg b/setup.cfg index 8f1fee7..b140696 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,6 +5,9 @@ cover-package = urllib3 cover-min-percentage = 100 cover-erase = true +[flake8] +max-line-length = 99 + [egg_info] tag_build = tag_date = 0 diff --git a/test/test_util.py b/test/test_util.py index 5dcaeab..944d90f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,5 +1,6 @@ import logging import unittest +import ssl from mock import patch @@ -11,6 +12,7 @@ from urllib3.util import ( parse_url, Timeout, Url, + resolve_cert_reqs, ) from urllib3.exceptions import LocationParseError, TimeoutStateError @@ -177,6 +179,10 @@ class TestUtil(unittest.TestCase): make_headers(proxy_basic_auth='foo:bar'), {'proxy-authorization': 'Basic Zm9vOmJhcg=='}) + self.assertEqual( + make_headers(disable_cache=True), + {'cache-control': 'no-cache'}) + def test_split_first(self): test_cases = { ('abcd', 'b'): ('a', 'cd', 'b'), @@ -294,4 +300,11 @@ class TestUtil(unittest.TestCase): current_time.return_value = TIMEOUT_EPOCH + 37 self.assertEqual(timeout.get_connect_duration(), 37) + def test_resolve_cert_reqs(self): + self.assertEqual(resolve_cert_reqs(None), ssl.CERT_NONE) + self.assertEqual(resolve_cert_reqs(ssl.CERT_NONE), ssl.CERT_NONE) + + self.assertEqual(resolve_cert_reqs(ssl.CERT_REQUIRED), ssl.CERT_REQUIRED) + self.assertEqual(resolve_cert_reqs('REQUIRED'), ssl.CERT_REQUIRED) + self.assertEqual(resolve_cert_reqs('CERT_REQUIRED'), ssl.CERT_REQUIRED) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 0021e34..8e4fc2f 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8.2 +Version: 1.8.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -28,7 +28,14 @@ Description: ======= - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at - `Requests `_ which is also powered by urllib3. + `Requests `_ which is also powered by ``urllib3``. + + You might already be using urllib3! + =================================== + + ``urllib3`` powers `many great Python libraries `_, + including ``pip`` and ``requests``. + What's wrong with urllib and urllib2? ===================================== @@ -99,6 +106,7 @@ Description: ======= py27: commands succeeded py32: commands succeeded py33: commands succeeded + py34: commands succeeded Note that code coverage less than 100% is regarded as a failing run. @@ -121,6 +129,28 @@ Description: ======= Changes ======= + 1.8.3 (2014-06-23) + ++++++++++++++++++ + + * Fix TLS verification when using a proxy in Python 3.4.1. (Issue #385) + + * Add ``disable_cache`` option to ``urllib3.util.make_headers``. (Issue #393) + + * Wrap ``socket.timeout`` exception with + ``urllib3.exceptions.ReadTimeoutError``. (Issue #399) + + * Fixed proxy-related bug where connections were being reused incorrectly. + (Issues #366, #369) + + * Added ``socket_options`` keyword parameter which allows to define + ``setsockopt`` configuration of new sockets. (Issue #397) + + * Removed ``HTTPConnection.tcp_nodelay`` in favor of + ``HTTPConnection.default_socket_options``. (Issue #397) + + * Fixed ``TypeError`` bug in Python 2.6.4. (Issue #411) + + 1.8.2 (2014-04-17) ++++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 72e3351..fb93e5b 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -7,20 +7,10 @@ setup.cfg setup.py test-requirements.txt dummyserver/__init__.py -dummyserver/__init__.pyc dummyserver/handlers.py -dummyserver/handlers.pyc dummyserver/proxy.py -dummyserver/proxy.pyc dummyserver/server.py -dummyserver/server.pyc dummyserver/testcase.py -dummyserver/testcase.pyc -dummyserver/__pycache__/__init__.cpython-33.pyc -dummyserver/__pycache__/handlers.cpython-33.pyc -dummyserver/__pycache__/proxy.cpython-33.pyc -dummyserver/__pycache__/server.cpython-33.pyc -dummyserver/__pycache__/testcase.cpython-33.pyc dummyserver/certs/cacert.key dummyserver/certs/cacert.pem dummyserver/certs/client.csr diff --git a/urllib3/__init__.py b/urllib3/__init__.py index bd237a6..c80d5da 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.8.2' +__version__ = '1.8.3' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 9cea3a4..ccf0d5f 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -116,7 +116,7 @@ class HTTPHeaderDict(MutableMapping): A ``dict`` like container for storing HTTP Headers. Field names are stored and compared case-insensitively in compliance with - RFC 2616. Iteration provides the first case-sensitive key seen for each + RFC 7230. Iteration provides the first case-sensitive key seen for each case-insensitive pair. Using ``__setitem__`` syntax overwrites fields that compare equal diff --git a/urllib3/connection.py b/urllib3/connection.py index de7b925..fbb63ed 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -8,32 +8,27 @@ import sys import socket from socket import timeout as SocketTimeout -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException except ImportError: from httplib import HTTPConnection as _HTTPConnection, HTTPException + class DummyConnection(object): "Used to detect a failed ConnectionCls import." pass -try: # Compiled with SSL? - ssl = None + +try: # Compiled with SSL? HTTPSConnection = DummyConnection + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None class BaseSSLError(BaseException): pass - try: # Python 3 - from http.client import HTTPSConnection as _HTTPSConnection - except ImportError: - from httplib import HTTPSConnection as _HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass from .exceptions import ( ConnectTimeoutError, @@ -58,12 +53,34 @@ class HTTPConnection(_HTTPConnection, object): """ Based on httplib.HTTPConnection but provides an extra constructor backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). """ default_port = port_by_scheme['http'] - # By default, disable Nagle's Algorithm. - tcp_nodelay = 1 + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] def __init__(self, *args, **kw): if six.PY3: # Python 3 @@ -74,30 +91,54 @@ class HTTPConnection(_HTTPConnection, object): # Pre-set source_address in case we have an older Python like 2.6. self.source_address = kw.get('source_address') + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + # Superclass also sets self.source_address in Python 2.7+. - _HTTPConnection.__init__(self, *args, **kw) + _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): """ Establish a socket connection and set nodelay settings on it. - :return: a new socket connection + :return: New socket connection. """ extra_args = [] if self.source_address: # Python 2.7+ extra_args.append(self.source_address) - conn = socket.create_connection( - (self.host, self.port), self.timeout, *extra_args) - conn.setsockopt( - socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) + try: + conn = socket.create_connection( + (self.host, self.port), self.timeout, *extra_args) + + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + # Set options on the socket. + self._set_options_on(conn) return conn def _prepare_conn(self, conn): self.sock = conn - if self._tunnel_host: + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def _set_options_on(self, conn): + # Disable all socket options if the user passes ``socket_options=None`` + if self.socket_options is None: + return + + for opt in self.socket_options: + conn.setsockopt(*opt) def connect(self): conn = self._new_conn() @@ -134,7 +175,6 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None - conn_kw = {} def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -149,37 +189,32 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - - try: - sock = socket.create_connection( - address=(self.host, self.port), timeout=self.timeout, - **self.conn_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + conn = self._new_conn() resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) - # the _tunnel_host attribute was added in python 2.6.3 (via - # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do - # not have them. + hostname = self.host if getattr(self, '_tunnel_host', None): - self.sock = sock + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + + self.sock = conn # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, - server_hostname=self.host, + server_hostname=hostname, ssl_version=resolved_ssl_version) if resolved_cert_reqs != ssl.CERT_NONE: @@ -188,7 +223,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.assert_fingerprint) elif self.assert_hostname is not False: match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) + self.assert_hostname or hostname) if ssl: diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 95a53a7..ab205fa 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -11,7 +11,7 @@ import logging from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full @@ -21,7 +21,6 @@ except ImportError: from .exceptions import ( ClosedPoolError, ConnectionError, - ConnectTimeoutError, EmptyPoolError, HostChangedError, LocationParseError, @@ -54,8 +53,8 @@ log = logging.getLogger(__name__) _Default = object() -## Pool objects +## Pool objects class ConnectionPool(object): """ Base class for all connection pools, such as @@ -82,6 +81,7 @@ class ConnectionPool(object): # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Thread-safe connection pool for one host. @@ -133,6 +133,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy_headers: A dictionary with proxy headers, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. """ scheme = 'http' @@ -166,11 +170,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 - - if sys.version_info < (2, 7): # Python 2.6 and older - conn_kw.pop('source_address', None) self.conn_kw = conn_kw + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -182,10 +189,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, strict=self.strict, **self.conn_kw) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 return conn def _get_conn(self, timeout=None): @@ -204,7 +207,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - except AttributeError: # self.pool is None + except AttributeError: # self.pool is None raise ClosedPoolError(self, "Pool is closed.") except Empty: @@ -218,6 +221,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None return conn or self._new_conn() @@ -237,7 +245,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ try: self.pool.put(conn, block=False) - return # Everything is dandy, done. + return # Everything is dandy, done. except AttributeError: # self.pool is None. pass @@ -283,16 +291,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj = self._get_timeout(timeout) - try: - timeout_obj.start_connect() - conn.timeout = timeout_obj.connect_timeout - # conn.request() calls httplib.*.request, not the method in - # urllib3.request. It also calls makefile (recv) on the socket. - conn.request(method, url, **httplib_request_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, timeout_obj.connect_timeout)) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout @@ -310,14 +313,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) - else: # None or a value + else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7+, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older + except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() except SocketTimeout: raise ReadTimeoutError( @@ -333,7 +336,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise - except SocketError as e: # Platform-specific: Python 2 + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: @@ -364,7 +367,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() except Empty: - pass # Done. + pass # Done. def is_same_host(self, url): """ @@ -605,11 +608,9 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=None, assert_fingerprint=None, **conn_kw): - if sys.version_info < (2, 7): # Python 2.6 or older - conn_kw.pop('source_address', None) - HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers, **conn_kw) + block, headers, _proxy, _proxy_headers, + **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -617,7 +618,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - self.conn_kw = conn_kw def _prepare_conn(self, conn): """ @@ -633,7 +633,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ @@ -641,7 +640,12 @@ class HTTPSConnectionPool(HTTPConnectionPool): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - set_tunnel(self.host, self.port, self.proxy_headers) + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. conn.connect() @@ -667,18 +671,9 @@ class HTTPSConnectionPool(HTTPConnectionPool): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - extra_params.update(self.conn_kw) - conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, - **extra_params) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 + strict=self.strict, **self.conn_kw) return self._prepare_conn(conn) diff --git a/urllib3/fields.py b/urllib3/fields.py index ed01765..dceafb4 100644 --- a/urllib3/fields.py +++ b/urllib3/fields.py @@ -15,7 +15,7 @@ def guess_content_type(filename, default='application/octet-stream'): Guess the "Content-Type" of a file. :param filename: - The filename to guess the "Content-Type" of using :mod:`mimetimes`. + The filename to guess the "Content-Type" of using :mod:`mimetypes`. :param default: If no "Content-Type" can be guessed, default to `default`. """ @@ -78,9 +78,10 @@ class RequestField(object): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. - Supports constructing :class:`~urllib3.fields.RequestField` from parameter - of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) - tuple where the MIME type is optional. For example: :: + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example: :: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), @@ -125,8 +126,8 @@ class RequestField(object): 'Content-Disposition' fields. :param header_parts: - A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as - `k1="v1"; k2="v2"; ...`. + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. """ parts = [] iterable = header_parts @@ -158,7 +159,8 @@ class RequestField(object): lines.append('\r\n') return '\r\n'.join(lines) - def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): """ Makes this request field into a multipart request field. @@ -172,6 +174,10 @@ class RequestField(object): """ self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) self.headers['Content-Type'] = content_type self.headers['Content-Location'] = content_location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e8b30bd..c3db30c 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -5,7 +5,6 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php import codecs -import mimetypes from uuid import uuid4 from io import BytesIO @@ -38,10 +37,10 @@ def iter_field_objects(fields): i = iter(fields) for field in i: - if isinstance(field, RequestField): - yield field - else: - yield RequestField.from_tuples(*field) + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index f18ff2b..3945f5d 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -161,7 +161,7 @@ class PoolManager(RequestMethods): # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) - # RFC 2616, Section 10.3.4 + # RFC 7231, Section 6.4.4 if response.status == 303: method = 'GET' diff --git a/urllib3/request.py b/urllib3/request.py index 2a92cc2..7a46f1b 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -26,8 +26,8 @@ class RequestMethods(object): Specifically, - :meth:`.request_encode_url` is for sending requests whose fields are encoded - in the URL (such as GET, HEAD, DELETE). + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are encoded in the *body* of the request using multipart or www-form-urlencoded @@ -51,7 +51,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): # Abstract + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -61,8 +61,8 @@ class RequestMethods(object): ``fields`` based on the ``method`` used. This is a convenience method that requires the least amount of manual - effort. It can be used in most situations, while still having the option - to drop down to more specific methods when necessary, such as + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. """ @@ -70,12 +70,12 @@ class RequestMethods(object): if method in self._encode_url_methods: return self.request_encode_url(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) else: return self.request_encode_body(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) def request_encode_url(self, method, url, fields=None, **urlopen_kw): """ @@ -94,14 +94,14 @@ class RequestMethods(object): the body. This is useful for request methods like POST, PUT, PATCH, etc. When ``encode_multipart=True`` (default), then - :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the - payload with the appropriate content type. Otherwise + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise :meth:`urllib.urlencode` is used with the 'application/x-www-form-urlencoded' content type. Multipart encoding must be used when posting files, and it's reasonably - safe to use it in other times too. However, it may break request signing, - such as with OAuth. + safe to use it in other times too. However, it may break request + signing, such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where @@ -119,17 +119,17 @@ class RequestMethods(object): When uploading a file, providing a filename (the first parameter of the tuple) is optional but recommended to best mimick behavior of browsers. - Note that if ``headers`` are supplied, the 'Content-Type' header will be - overwritten because it depends on the dynamic random boundary string + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ if encode_multipart: - body, content_type = encode_multipart_formdata(fields or {}, - boundary=multipart_boundary) + body, content_type = encode_multipart_formdata( + fields or {}, boundary=multipart_boundary) else: body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') + 'application/x-www-form-urlencoded') if headers is None: headers = self.headers diff --git a/urllib3/response.py b/urllib3/response.py index db44182..13ffba4 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -5,19 +5,16 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging import zlib import io +from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import DecodeError +from .exceptions import DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type from .util import is_fp_closed -log = logging.getLogger(__name__) - - class DeflateDecoder(object): def __init__(self): @@ -163,8 +160,8 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 2616 - # Section 3.5 + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() if self._decoder is None: if content_encoding in self.CONTENT_DECODERS: @@ -178,23 +175,29 @@ class HTTPResponse(io.IOBase): flush_decoder = False try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do not - # properly close the connection in all cases. There is no harm - # in redundantly calling close. - self._fp.close() + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') self._fp_bytes_read += len(data) @@ -204,8 +207,7 @@ class HTTPResponse(io.IOBase): except (IOError, zlib.error) as e: raise DecodeError( "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e) + "failed to decode it." % content_encoding, e) if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) @@ -242,7 +244,6 @@ class HTTPResponse(io.IOBase): if data: yield data - @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -297,7 +298,7 @@ class HTTPResponse(io.IOBase): elif hasattr(self._fp, "fileno"): return self._fp.fileno() else: - raise IOError("The file-like object this HTTPResponse is wrapped " + raise IOError("The file-like object this HTTPResponse is wrapped " "around has no file descriptor") def flush(self): diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 8deeab5..c67ef04 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -8,6 +8,7 @@ except ImportError: # `poll` doesn't exist on OSX and other platforms except ImportError: # `select` doesn't exist on AppEngine. select = False + def is_connection_dropped(conn): # Platform-specific """ Returns True if the connection is dropped and should be closed. @@ -22,7 +23,7 @@ def is_connection_dropped(conn): # Platform-specific if sock is False: # Platform-specific: AppEngine return False if sock is None: # Connection already closed (such as by httplib). - return False + return True if not poll: if not select: # Platform-specific: AppEngine @@ -40,6 +41,3 @@ def is_connection_dropped(conn): # Platform-specific if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True - - - diff --git a/urllib3/util/request.py b/urllib3/util/request.py index d48d651..bfd7a98 100644 --- a/urllib3/util/request.py +++ b/urllib3/util/request.py @@ -7,7 +7,7 @@ ACCEPT_ENCODING = 'gzip,deflate' def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): + basic_auth=None, proxy_basic_auth=None, disable_cache=None): """ Shortcuts for generating request headers. @@ -29,8 +29,11 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, auth header. :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. + Colon-separated username:password string for + 'proxy-authorization: basic ...' auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. Example: :: @@ -63,6 +66,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, headers['proxy-authorization'] = 'Basic ' + \ b64encode(six.b(proxy_basic_auth)).decode('utf-8') - return headers - + if disable_cache: + headers['cache-control'] = 'no-cache' + return headers diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py index 4f947cb..aaadc12 100644 --- a/urllib3/util/timeout.py +++ b/urllib3/util/timeout.py @@ -94,17 +94,16 @@ class Timeout(object): return '%s(connect=%r, read=%r, total=%r)' % ( type(self).__name__, self._connect, self._read, self.total) - @classmethod def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid + """ Check that a timeout attribute is valid. :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If the type is not an integer or a float, or if it + is a numeric value less than zero. """ if value is _Default: return cls.DEFAULT_TIMEOUT @@ -123,7 +122,7 @@ class Timeout(object): raise ValueError("Attempted to set %s timeout to %s, but the " "timeout cannot be set to a value less " "than 0." % (name, value)) - except TypeError: # Python 3 + except TypeError: # Python 3 raise ValueError("Timeout value %s was %s, but it must be an " "int or float." % (name, value)) @@ -135,12 +134,12 @@ class Timeout(object): The timeout value used by httplib.py sets the same timeout on the connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. + object that sets the individual timeouts to the ``timeout`` value + passed to this function. - :param timeout: The legacy timeout value + :param timeout: The legacy timeout value. :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object + :return: Timeout object :rtype: :class:`Timeout` """ return Timeout(read=timeout, connect=timeout) @@ -174,7 +173,7 @@ class Timeout(object): def get_connect_duration(self): """ Gets the time elapsed since the call to :meth:`start_connect`. - :return: the elapsed time + :return: Elapsed time. :rtype: float :raises urllib3.exceptions.TimeoutStateError: if you attempt to get duration for a timer that hasn't been started. @@ -191,7 +190,7 @@ class Timeout(object): This will be a positive float or integer, the value None (never timeout), or the default system timeout. - :return: the connect timeout + :return: Connect timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None """ if self.total is None: @@ -214,7 +213,7 @@ class Timeout(object): established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be raised. - :return: the value to use for the read timeout + :return: Value to use for the read timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` has not yet been called on this object. @@ -223,7 +222,7 @@ class Timeout(object): self.total is not self.DEFAULT_TIMEOUT and self._read is not None and self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. + # In case the connect timeout has not yet been established. if self._start_connect is None: return self._read return max(0, min(self.total - self.get_connect_duration(), diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 362d216..122108b 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -2,16 +2,20 @@ from collections import namedtuple from ..exceptions import LocationParseError +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + +class Url(namedtuple('Url', url_attrs)): """ Datastructure for representing an HTTP URL. Used as a return value for :func:`parse_url`. """ slots = () - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) @property def hostname(self): -- cgit v1.2.3 From 0f393d00b51bc54c5075447e4a8b21f0bed6acd8 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:39 -0700 Subject: Imported Upstream version 1.9 --- CHANGES.rst | 37 ++++++ LICENSE.txt | 2 +- MANIFEST.in | 3 +- Makefile | 47 +++++++ PKG-INFO | 85 ++++++++++-- README.rst | 46 +++++-- dev-requirements.txt | 5 + dummyserver/handlers.py | 28 +++- dummyserver/server.py | 2 +- dummyserver/testcase.py | 7 + setup.py | 13 +- test-requirements.txt | 4 - test/test_connectionpool.py | 11 +- test/test_poolmanager.py | 5 +- test/test_response.py | 5 + test/test_retry.py | 156 ++++++++++++++++++++++ test/test_util.py | 30 ++++- urllib3.egg-info/PKG-INFO | 85 ++++++++++-- urllib3.egg-info/SOURCES.txt | 5 +- urllib3/__init__.py | 26 ++-- urllib3/_collections.py | 6 - urllib3/connection.py | 51 ++++--- urllib3/connectionpool.py | 182 ++++++++++++++++--------- urllib3/contrib/ntlmpool.py | 6 - urllib3/contrib/pyopenssl.py | 202 +++------------------------- urllib3/exceptions.py | 37 ++++-- urllib3/fields.py | 8 +- urllib3/filepost.py | 6 - urllib3/packages/ordered_dict.py | 1 - urllib3/poolmanager.py | 41 +++--- urllib3/request.py | 8 +- urllib3/response.py | 31 +++-- urllib3/util/__init__.py | 9 +- urllib3/util/connection.py | 56 +++++++- urllib3/util/request.py | 13 +- urllib3/util/retry.py | 279 +++++++++++++++++++++++++++++++++++++++ urllib3/util/ssl_.py | 5 +- urllib3/util/timeout.py | 67 +++++----- urllib3/util/url.py | 9 +- 39 files changed, 1151 insertions(+), 468 deletions(-) create mode 100644 Makefile create mode 100644 dev-requirements.txt delete mode 100644 test-requirements.txt create mode 100644 test/test_retry.py create mode 100644 urllib3/util/retry.py diff --git a/CHANGES.rst b/CHANGES.rst index 4d90ce2..9ada9c2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,43 @@ Changes ======= +1.9 (2014-07-04) +++++++++++++++++ + +* Shuffled around development-related files. If you're maintaining a distro + package of urllib3, you may need to tweak things. (Issue #415) + +* Unverified HTTPS requests will trigger a warning on the first request. See + our new `security documentation + `_ for details. + (Issue #426) + +* New retry logic and ``urllib3.util.retry.Retry`` configuration object. + (Issue #326) + +* All raised exceptions should now wrapped in a + ``urllib3.exceptions.HTTPException``-extending exception. (Issue #326) + +* All errors during a retry-enabled request should be wrapped in + ``urllib3.exceptions.MaxRetryError``, including timeout-related exceptions + which were previously exempt. Underlying error is accessible from the + ``.reason`` propery. (Issue #326) + +* ``urllib3.exceptions.ConnectionError`` renamed to + ``urllib3.exceptions.ProtocolError``. (Issue #326) + +* Errors during response read (such as IncompleteRead) are now wrapped in + ``urllib3.exceptions.ProtocolError``. (Issue #418) + +* Requesting an empty host will raise ``urllib3.exceptions.LocationValueError``. + (Issue #417) + +* Catch read timeouts over SSL connections as + ``urllib3.exceptions.ReadTimeoutError``. (Issue #419) + +* Apply socket arguments before connecting. (Issue #427) + + 1.8.3 (2014-06-23) ++++++++++++++++++ diff --git a/LICENSE.txt b/LICENSE.txt index 31f0b6c..2a02593 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ This is the MIT license: http://www.opensource.org/licenses/mit-license.php -Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software diff --git a/MANIFEST.in b/MANIFEST.in index 3c2189a..6b37d64 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,2 @@ -include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt test-requirements.txt +include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt dev-requirements.txt Makefile recursive-include dummyserver *.* -prune *.pyc diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a6cdcfb --- /dev/null +++ b/Makefile @@ -0,0 +1,47 @@ +REQUIREMENTS_FILE=dev-requirements.txt +REQUIREMENTS_OUT=dev-requirements.txt.log +SETUP_OUT=*.egg-info + + +all: setup requirements + +virtualenv: +ifndef VIRTUAL_ENV + $(error Must be run inside of a virtualenv) +endif + +setup: virtualenv $(SETUP_OUT) + +$(SETUP_OUT): setup.py setup.cfg + python setup.py develop + touch $(SETUP_OUT) + +requirements: setup $(REQUIREMENTS_OUT) + +piprot: setup + pip install piprot + piprot -x $(REQUIREMENTS_FILE) + +$(REQUIREMENTS_OUT): $(REQUIREMENTS_FILE) + pip install -r $(REQUIREMENTS_FILE) | tee -a $(REQUIREMENTS_OUT) + python setup.py develop + +clean: + find . -name "*.py[oc]" -delete + find . -name "__pycache__" -delete + rm -f $(REQUIREMENTS_OUT) + +test: requirements + nosetests + +test-all: requirements + tox + +docs: + cd docs && pip install -r doc-requirements.txt && make html + +release: + ./release.sh + + +.PHONY: docs diff --git a/PKG-INFO b/PKG-INFO index 8e4fc2f..168944c 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8.3 +Version: 1.9 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -25,16 +25,18 @@ Description: ======= - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. + - Tested on Python 2.6+, Python 3.2+, and PyPy, with 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by ``urllib3``. - + + You might already be using urllib3! =================================== - ``urllib3`` powers `many great Python libraries `_, - including ``pip`` and ``requests``. + ``urllib3`` powers `many great Python libraries + `_, including ``pip`` and + ``requests``. What's wrong with urllib and urllib2? @@ -50,6 +52,7 @@ Description: ======= solving a different scope of problems, and ``urllib3`` follows in a similar vein. + Why do I want to reuse connections? =================================== @@ -71,6 +74,7 @@ Description: ======= retrying is useful. It's relatively lightweight, so it can be used for anything! + Examples ======== @@ -89,26 +93,39 @@ Description: ======= The ``PoolManager`` will take care of reusing connections for you whenever you request the same host. For more fine-grained control of your connection - pools, you should look at - `ConnectionPool `_. + pools, you should look at `ConnectionPool + `_. Run the tests ============= We use some external dependencies, multiple interpreters and code coverage - analysis while running test suite. Easiest way to run the tests is thusly the - ``tox`` utility: :: + analysis while running test suite. Our ``Makefile`` handles much of this for + you as long as you're running it `inside of a virtualenv + `_:: + + $ make test + [... magically installs dependencies and runs tests on your virtualenv] + Ran 182 tests in 1.633s + + OK (SKIP=6) - $ tox - # [..] + Note that code coverage less than 100% is regarded as a failing run. Some + platform-specific tests are skipped unless run in that platform. To make sure + the code works in all of urllib3's supported platforms, you can run our ``tox`` + suite:: + + $ make test-all + [... tox creates a virtualenv for every platform and runs tests inside of each] py26: commands succeeded py27: commands succeeded py32: commands succeeded py33: commands succeeded py34: commands succeeded - Note that code coverage less than 100% is regarded as a failing run. + Our test suite `runs continuously on Travis CI + `_ with every pull request. Contributing @@ -126,9 +143,53 @@ Description: ======= :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + Sponsorship + =========== + + If your company benefits from this library, please consider `sponsoring its + development `_. + + Changes ======= + 1.9 (2014-07-04) + ++++++++++++++++ + + * Shuffled around development-related files. If you're maintaining a distro + package of urllib3, you may need to tweak things. (Issue #415) + + * Unverified HTTPS requests will trigger a warning on the first request. See + our new `security documentation + `_ for details. + (Issue #426) + + * New retry logic and ``urllib3.util.retry.Retry`` configuration object. + (Issue #326) + + * All raised exceptions should now wrapped in a + ``urllib3.exceptions.HTTPException``-extending exception. (Issue #326) + + * All errors during a retry-enabled request should be wrapped in + ``urllib3.exceptions.MaxRetryError``, including timeout-related exceptions + which were previously exempt. Underlying error is accessible from the + ``.reason`` propery. (Issue #326) + + * ``urllib3.exceptions.ConnectionError`` renamed to + ``urllib3.exceptions.ProtocolError``. (Issue #326) + + * Errors during response read (such as IncompleteRead) are now wrapped in + ``urllib3.exceptions.ProtocolError``. (Issue #418) + + * Requesting an empty host will raise ``urllib3.exceptions.LocationValueError``. + (Issue #417) + + * Catch read timeouts over SSL connections as + ``urllib3.exceptions.ReadTimeoutError``. (Issue #419) + + * Apply socket arguments before connecting. (Issue #427) + + 1.8.3 (2014-06-23) ++++++++++++++++++ diff --git a/README.rst b/README.rst index e76c261..6a81759 100644 --- a/README.rst +++ b/README.rst @@ -17,16 +17,18 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. +- Tested on Python 2.6+, Python 3.2+, and PyPy, with 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by ``urllib3``. - + + You might already be using urllib3! =================================== -``urllib3`` powers `many great Python libraries `_, -including ``pip`` and ``requests``. +``urllib3`` powers `many great Python libraries +`_, including ``pip`` and +``requests``. What's wrong with urllib and urllib2? @@ -42,6 +44,7 @@ with each other. They were designed to be independent and standalone, each solving a different scope of problems, and ``urllib3`` follows in a similar vein. + Why do I want to reuse connections? =================================== @@ -63,6 +66,7 @@ This library is perfect for: retrying is useful. It's relatively lightweight, so it can be used for anything! + Examples ======== @@ -81,26 +85,39 @@ But, long story short:: The ``PoolManager`` will take care of reusing connections for you whenever you request the same host. For more fine-grained control of your connection -pools, you should look at -`ConnectionPool `_. +pools, you should look at `ConnectionPool +`_. Run the tests ============= We use some external dependencies, multiple interpreters and code coverage -analysis while running test suite. Easiest way to run the tests is thusly the -``tox`` utility: :: +analysis while running test suite. Our ``Makefile`` handles much of this for +you as long as you're running it `inside of a virtualenv +`_:: + + $ make test + [... magically installs dependencies and runs tests on your virtualenv] + Ran 182 tests in 1.633s - $ tox - # [..] + OK (SKIP=6) + +Note that code coverage less than 100% is regarded as a failing run. Some +platform-specific tests are skipped unless run in that platform. To make sure +the code works in all of urllib3's supported platforms, you can run our ``tox`` +suite:: + + $ make test-all + [... tox creates a virtualenv for every platform and runs tests inside of each] py26: commands succeeded py27: commands succeeded py32: commands succeeded py33: commands succeeded py34: commands succeeded -Note that code coverage less than 100% is regarded as a failing run. +Our test suite `runs continuously on Travis CI +`_ with every pull request. Contributing @@ -116,3 +133,10 @@ Contributing as expected. #. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + + +Sponsorship +=========== + +If your company benefits from this library, please consider `sponsoring its +development `_. diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..6de0e09 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,5 @@ +nose==1.3.3 +mock==1.0.1 +tornado==3.2.2 +coverage==3.7.1 +tox==1.7.1 diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index 5d6e2e6..72faa1a 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -1,5 +1,6 @@ from __future__ import print_function +import collections import gzip import json import logging @@ -41,11 +42,12 @@ class TestingApp(WSGIHandler): Simple app that performs various operations, useful for testing an HTTP library. - Given any path, it will attempt to convert it will load a corresponding - local method if it exists. Status code 200 indicates success, 400 indicates - failure. Each method has its own conditions for success/failure. + Given any path, it will attempt to load a corresponding local method if + it exists. Status code 200 indicates success, 400 indicates failure. Each + method has its own conditions for success/failure. """ def __call__(self, environ, start_response): + """ Call the correct method in this class based on the incoming URI """ req = HTTPRequest(environ) req.params = {} @@ -172,6 +174,25 @@ class TestingApp(WSGIHandler): def headers(self, request): return Response(json.dumps(request.headers)) + def successful_retry(self, request): + """ Handler which will return an error and then success + + It's not currently very flexible as the number of retries is hard-coded. + """ + test_name = request.headers.get('test-name', None) + if not test_name: + return Response("test-name header not set", + status="400 Bad Request") + + if not hasattr(self, 'retry_test_names'): + self.retry_test_names = collections.defaultdict(int) + self.retry_test_names[test_name] += 1 + + if self.retry_test_names[test_name] >= 2: + return Response("Retry successful!") + else: + return Response("need to keep retrying!", status="418 I'm A Teapot") + def shutdown(self, request): sys.exit() @@ -207,7 +228,6 @@ def _parse_header(line): params.pop(0) # get rid of the dummy again pdict = {} for name, value in params: - print(repr(value)) value = email.utils.collapse_rfc2231_value(value) if len(value) >= 2 and value[0] == '"' and value[-1] == '"': value = value[1:-1] diff --git a/dummyserver/server.py b/dummyserver/server.py index 22de456..99f0835 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -58,7 +58,7 @@ class SocketServerThread(threading.Thread): self.port = sock.getsockname()[1] # Once listen() returns, the server socket is ready - sock.listen(1) + sock.listen(0) if self.ready_event: self.ready_event.set() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 35769ef..335b2f2 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -40,6 +40,13 @@ class SocketDummyServerTestCase(unittest.TestCase): class HTTPDummyServerTestCase(unittest.TestCase): + """ A simple HTTP server that runs when your test class runs + + Have your unittest class inherit from this one, and then a simple server + will start when your tests run, and automatically shut down when they + complete. For examples of what test requests you can send to the server, + see the TestingApp in dummyserver/handlers.py. + """ scheme = 'http' host = 'localhost' host_alt = '127.0.0.1' # Some tests need two hosts diff --git a/setup.py b/setup.py index 92fad33..f638377 100644 --- a/setup.py +++ b/setup.py @@ -21,9 +21,6 @@ fp.close() version = VERSION -requirements = [] -tests_requirements = requirements + open('test-requirements.txt').readlines() - setup(name='urllib3', version=version, description="HTTP library with thread-safe connection pooling, file post, and more.", @@ -48,7 +45,13 @@ setup(name='urllib3', 'urllib3.packages', 'urllib3.packages.ssl_match_hostname', 'urllib3.contrib', 'urllib3.util', ], - requires=requirements, - tests_require=tests_requirements, + requires=[], + tests_require=[ + # These are a less-specific subset of dev-requirements.txt, for the + # convenience of distro package maintainers. + 'nose', + 'mock', + 'tornado', + ], test_suite='test', ) diff --git a/test-requirements.txt b/test-requirements.txt deleted file mode 100644 index 02d70f4..0000000 --- a/test-requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -nose==1.3 -mock==1.0.1 -tornado==3.1.1 -coverage==3.6 diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 02229cf..28fb89b 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -5,13 +5,15 @@ from urllib3.connectionpool import ( HTTPConnection, HTTPConnectionPool, ) -from urllib3.util import Timeout +from urllib3.util.timeout import Timeout from urllib3.packages.ssl_match_hostname import CertificateError from urllib3.exceptions import ( ClosedPoolError, EmptyPoolError, HostChangedError, + LocationValueError, MaxRetryError, + ProtocolError, SSLError, ) @@ -127,7 +129,7 @@ class TestConnectionPool(unittest.TestCase): HTTPConnectionPool(host='localhost'), "Test.", err)), "HTTPConnectionPool(host='localhost', port=None): " "Max retries exceeded with url: Test. " - "(Caused by {0}: Test)".format(str(err.__class__))) + "(Caused by %r)" % err) def test_pool_size(self): @@ -186,7 +188,6 @@ class TestConnectionPool(unittest.TestCase): self.assertRaises(Empty, old_pool_queue.get, block=False) - def test_pool_timeouts(self): pool = HTTPConnectionPool(host='localhost') conn = pool._new_conn() @@ -201,6 +202,10 @@ class TestConnectionPool(unittest.TestCase): self.assertEqual(pool.timeout._connect, 3) self.assertEqual(pool.timeout.total, None) + def test_no_host(self): + self.assertRaises(LocationValueError, HTTPConnectionPool, None) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 759b5e3..754ee8a 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -4,7 +4,7 @@ from urllib3.poolmanager import PoolManager from urllib3 import connection_from_url from urllib3.exceptions import ( ClosedPoolError, - LocationParseError, + LocationValueError, ) @@ -68,7 +68,8 @@ class TestPoolManager(unittest.TestCase): def test_nohost(self): p = PoolManager(5) - self.assertRaises(LocationParseError, p.connection_from_url, 'http://@') + self.assertRaises(LocationValueError, p.connection_from_url, 'http://@') + self.assertRaises(LocationValueError, p.connection_from_url, None) if __name__ == '__main__': diff --git a/test/test_response.py b/test/test_response.py index ecfcbee..ad134ee 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -131,6 +131,11 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.read(1), b'f') self.assertEqual(r.read(2), b'oo') + def test_body_blob(self): + resp = HTTPResponse(b'foo') + self.assertEqual(resp.data, b'foo') + self.assertTrue(resp.closed) + def test_io(self): import socket try: diff --git a/test/test_retry.py b/test/test_retry.py new file mode 100644 index 0000000..7a3aa40 --- /dev/null +++ b/test/test_retry.py @@ -0,0 +1,156 @@ +import unittest + +from urllib3.packages.six.moves import xrange +from urllib3.util.retry import Retry +from urllib3.exceptions import ( + ConnectTimeoutError, + ReadTimeoutError, + MaxRetryError +) + + +class RetryTest(unittest.TestCase): + + def test_string(self): + """ Retry string representation looks the way we expect """ + retry = Retry() + self.assertEqual(str(retry), 'Retry(total=10, connect=None, read=None, redirect=None)') + for _ in range(3): + retry = retry.increment() + self.assertEqual(str(retry), 'Retry(total=7, connect=None, read=None, redirect=None)') + + def test_retry_both_specified(self): + """Total can win if it's lower than the connect value""" + error = ConnectTimeoutError() + retry = Retry(connect=3, total=2) + retry = retry.increment(error=error) + retry = retry.increment(error=error) + try: + retry.increment(error=error) + self.fail("Failed to raise error.") + except MaxRetryError as e: + self.assertEqual(e.reason, error) + + def test_retry_higher_total_loses(self): + """ A lower connect timeout than the total is honored """ + error = ConnectTimeoutError() + retry = Retry(connect=2, total=3) + retry = retry.increment(error=error) + retry = retry.increment(error=error) + self.assertRaises(MaxRetryError, retry.increment, error=error) + + def test_retry_higher_total_loses_vs_read(self): + """ A lower read timeout than the total is honored """ + error = ReadTimeoutError(None, "/", "read timed out") + retry = Retry(read=2, total=3) + retry = retry.increment(error=error) + retry = retry.increment(error=error) + self.assertRaises(MaxRetryError, retry.increment, error=error) + + def test_retry_total_none(self): + """ if Total is none, connect error should take precedence """ + error = ConnectTimeoutError() + retry = Retry(connect=2, total=None) + retry = retry.increment(error=error) + retry = retry.increment(error=error) + try: + retry.increment(error=error) + self.fail("Failed to raise error.") + except MaxRetryError as e: + self.assertEqual(e.reason, error) + + error = ReadTimeoutError(None, "/", "read timed out") + retry = Retry(connect=2, total=None) + retry = retry.increment(error=error) + retry = retry.increment(error=error) + retry = retry.increment(error=error) + self.assertFalse(retry.is_exhausted()) + + def test_retry_default(self): + """ If no value is specified, should retry connects 3 times """ + retry = Retry() + self.assertEqual(retry.total, 10) + self.assertEqual(retry.connect, None) + self.assertEqual(retry.read, None) + self.assertEqual(retry.redirect, None) + + error = ConnectTimeoutError() + retry = Retry(connect=1) + retry = retry.increment(error=error) + self.assertRaises(MaxRetryError, retry.increment, error=error) + + retry = Retry(connect=1) + retry = retry.increment(error=error) + self.assertFalse(retry.is_exhausted()) + + self.assertTrue(Retry(0).raise_on_redirect) + self.assertFalse(Retry(False).raise_on_redirect) + + def test_retry_read_zero(self): + """ No second chances on read timeouts, by default """ + error = ReadTimeoutError(None, "/", "read timed out") + retry = Retry(read=0) + try: + retry.increment(error=error) + self.fail("Failed to raise error.") + except MaxRetryError as e: + self.assertEqual(e.reason, error) + + def test_backoff(self): + """ Backoff is computed correctly """ + max_backoff = Retry.BACKOFF_MAX + + retry = Retry(total=100, backoff_factor=0.2) + self.assertEqual(retry.get_backoff_time(), 0) # First request + + retry = retry.increment() + self.assertEqual(retry.get_backoff_time(), 0) # First retry + + retry = retry.increment() + self.assertEqual(retry.backoff_factor, 0.2) + self.assertEqual(retry.total, 98) + self.assertEqual(retry.get_backoff_time(), 0.4) # Start backoff + + retry = retry.increment() + self.assertEqual(retry.get_backoff_time(), 0.8) + + retry = retry.increment() + self.assertEqual(retry.get_backoff_time(), 1.6) + + for i in xrange(10): + retry = retry.increment() + + self.assertEqual(retry.get_backoff_time(), max_backoff) + + def test_zero_backoff(self): + retry = Retry() + self.assertEqual(retry.get_backoff_time(), 0) + retry = retry.increment() + retry = retry.increment() + self.assertEqual(retry.get_backoff_time(), 0) + + def test_sleep(self): + # sleep a very small amount of time so our code coverage is happy + retry = Retry(backoff_factor=0.0001) + retry = retry.increment() + retry = retry.increment() + retry.sleep() + + def test_status_forcelist(self): + retry = Retry(status_forcelist=xrange(500,600)) + self.assertFalse(retry.is_forced_retry('GET', status_code=200)) + self.assertFalse(retry.is_forced_retry('GET', status_code=400)) + self.assertTrue(retry.is_forced_retry('GET', status_code=500)) + + retry = Retry(total=1, status_forcelist=[418]) + self.assertFalse(retry.is_forced_retry('GET', status_code=400)) + self.assertTrue(retry.is_forced_retry('GET', status_code=418)) + + def test_exhausted(self): + self.assertFalse(Retry(0).is_exhausted()) + self.assertTrue(Retry(-1).is_exhausted()) + self.assertEqual(Retry(1).increment().total, 0) + + def test_disabled(self): + self.assertRaises(MaxRetryError, Retry(-1).increment) + self.assertRaises(MaxRetryError, Retry(0).increment) diff --git a/test/test_util.py b/test/test_util.py index 944d90f..388d877 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,20 +1,27 @@ +import warnings import logging import unittest import ssl from mock import patch -from urllib3 import add_stderr_logger -from urllib3.util import ( +from urllib3 import add_stderr_logger, disable_warnings +from urllib3.util.request import make_headers +from urllib3.util.timeout import Timeout +from urllib3.util.url import ( get_host, - make_headers, - split_first, parse_url, - Timeout, + split_first, Url, - resolve_cert_reqs, ) -from urllib3.exceptions import LocationParseError, TimeoutStateError +from urllib3.util.ssl_ import resolve_cert_reqs +from urllib3.exceptions import ( + LocationParseError, + TimeoutStateError, + InsecureRequestWarning, +) + +from . import clear_warnings # This number represents a time in seconds, it doesn't mean anything in # isolation. Setting to a high-ish value to avoid conflicts with the smaller @@ -203,6 +210,15 @@ class TestUtil(unittest.TestCase): logger.debug('Testing add_stderr_logger') logger.removeHandler(handler) + def test_disable_warnings(self): + with warnings.catch_warnings(record=True) as w: + clear_warnings() + warnings.warn('This is a test.', InsecureRequestWarning) + self.assertEqual(len(w), 1) + disable_warnings() + warnings.warn('This is a test.', InsecureRequestWarning) + self.assertEqual(len(w), 1) + def _make_time_pass(self, seconds, timeout, time_mock): """ Make some time pass for the timeout object """ time_mock.return_value = TIMEOUT_EPOCH diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 8e4fc2f..168944c 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8.3 +Version: 1.9 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -25,16 +25,18 @@ Description: ======= - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. + - Tested on Python 2.6+, Python 3.2+, and PyPy, with 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by ``urllib3``. - + + You might already be using urllib3! =================================== - ``urllib3`` powers `many great Python libraries `_, - including ``pip`` and ``requests``. + ``urllib3`` powers `many great Python libraries + `_, including ``pip`` and + ``requests``. What's wrong with urllib and urllib2? @@ -50,6 +52,7 @@ Description: ======= solving a different scope of problems, and ``urllib3`` follows in a similar vein. + Why do I want to reuse connections? =================================== @@ -71,6 +74,7 @@ Description: ======= retrying is useful. It's relatively lightweight, so it can be used for anything! + Examples ======== @@ -89,26 +93,39 @@ Description: ======= The ``PoolManager`` will take care of reusing connections for you whenever you request the same host. For more fine-grained control of your connection - pools, you should look at - `ConnectionPool `_. + pools, you should look at `ConnectionPool + `_. Run the tests ============= We use some external dependencies, multiple interpreters and code coverage - analysis while running test suite. Easiest way to run the tests is thusly the - ``tox`` utility: :: + analysis while running test suite. Our ``Makefile`` handles much of this for + you as long as you're running it `inside of a virtualenv + `_:: + + $ make test + [... magically installs dependencies and runs tests on your virtualenv] + Ran 182 tests in 1.633s + + OK (SKIP=6) - $ tox - # [..] + Note that code coverage less than 100% is regarded as a failing run. Some + platform-specific tests are skipped unless run in that platform. To make sure + the code works in all of urllib3's supported platforms, you can run our ``tox`` + suite:: + + $ make test-all + [... tox creates a virtualenv for every platform and runs tests inside of each] py26: commands succeeded py27: commands succeeded py32: commands succeeded py33: commands succeeded py34: commands succeeded - Note that code coverage less than 100% is regarded as a failing run. + Our test suite `runs continuously on Travis CI + `_ with every pull request. Contributing @@ -126,9 +143,53 @@ Description: ======= :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + Sponsorship + =========== + + If your company benefits from this library, please consider `sponsoring its + development `_. + + Changes ======= + 1.9 (2014-07-04) + ++++++++++++++++ + + * Shuffled around development-related files. If you're maintaining a distro + package of urllib3, you may need to tweak things. (Issue #415) + + * Unverified HTTPS requests will trigger a warning on the first request. See + our new `security documentation + `_ for details. + (Issue #426) + + * New retry logic and ``urllib3.util.retry.Retry`` configuration object. + (Issue #326) + + * All raised exceptions should now wrapped in a + ``urllib3.exceptions.HTTPException``-extending exception. (Issue #326) + + * All errors during a retry-enabled request should be wrapped in + ``urllib3.exceptions.MaxRetryError``, including timeout-related exceptions + which were previously exempt. Underlying error is accessible from the + ``.reason`` propery. (Issue #326) + + * ``urllib3.exceptions.ConnectionError`` renamed to + ``urllib3.exceptions.ProtocolError``. (Issue #326) + + * Errors during response read (such as IncompleteRead) are now wrapped in + ``urllib3.exceptions.ProtocolError``. (Issue #418) + + * Requesting an empty host will raise ``urllib3.exceptions.LocationValueError``. + (Issue #417) + + * Catch read timeouts over SSL connections as + ``urllib3.exceptions.ReadTimeoutError``. (Issue #419) + + * Apply socket arguments before connecting. (Issue #427) + + 1.8.3 (2014-06-23) ++++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index fb93e5b..e0b9ddd 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -2,10 +2,11 @@ CHANGES.rst CONTRIBUTORS.txt LICENSE.txt MANIFEST.in +Makefile README.rst +dev-requirements.txt setup.cfg setup.py -test-requirements.txt dummyserver/__init__.py dummyserver/handlers.py dummyserver/proxy.py @@ -30,6 +31,7 @@ test/test_filepost.py test/test_poolmanager.py test/test_proxymanager.py test/test_response.py +test/test_retry.py test/test_util.py urllib3/__init__.py urllib3/_collections.py @@ -57,6 +59,7 @@ urllib3/util/__init__.py urllib3/util/connection.py urllib3/util/request.py urllib3/util/response.py +urllib3/util/retry.py urllib3/util/ssl_.py urllib3/util/timeout.py urllib3/util/url.py \ No newline at end of file diff --git a/urllib3/__init__.py b/urllib3/__init__.py index c80d5da..56f5bf4 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -1,16 +1,10 @@ -# urllib3/__init__.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ urllib3 - Thread-safe connection pooling and re-using. """ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.8.3' +__version__ = '1.9' from .connectionpool import ( @@ -23,7 +17,10 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host, Timeout +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry # Set default logging handler to avoid "No handler found" warnings. @@ -51,8 +48,19 @@ def add_stderr_logger(level=logging.DEBUG): handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(handler) logger.setLevel(level) - logger.debug('Added an stderr logging handler to logger: %s' % __name__) + logger.debug('Added a stderr logging handler to logger: %s' % __name__) return handler # ... Clean up. del NullHandler + + +# Set security warning to only go off once by default. +import warnings +warnings.simplefilter('module', exceptions.InsecureRequestWarning) + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index ccf0d5f..d77ebb8 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -1,9 +1,3 @@ -# urllib3/_collections.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - from collections import Mapping, MutableMapping try: from threading import RLock diff --git a/urllib3/connection.py b/urllib3/connection.py index fbb63ed..0d578d7 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -1,9 +1,3 @@ -# urllib3/connection.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import sys import socket from socket import timeout as SocketTimeout @@ -35,13 +29,16 @@ from .exceptions import ( ) from .packages.ssl_match_hostname import match_hostname from .packages import six -from .util import ( - assert_fingerprint, + +from .util.ssl_ import ( resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, + assert_fingerprint, ) +from .util import connection + port_by_scheme = { 'http': 80, @@ -82,15 +79,23 @@ class HTTPConnection(_HTTPConnection, object): #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + #: Whether this connection verifies the host's certificate. + is_verified = False + def __init__(self, *args, **kw): if six.PY3: # Python 3 kw.pop('strict', None) - if sys.version_info < (2, 7): # Python 2.6 and older - kw.pop('source_address', None) # Pre-set source_address in case we have an older Python like 2.6. self.source_address = kw.get('source_address') + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + #: The socket options provided by the user. If no options are #: provided, we use the default options. self.socket_options = kw.pop('socket_options', self.default_socket_options) @@ -103,22 +108,22 @@ class HTTPConnection(_HTTPConnection, object): :return: New socket connection. """ - extra_args = [] - if self.source_address: # Python 2.7+ - extra_args.append(self.source_address) + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options try: - conn = socket.create_connection( - (self.host, self.port), self.timeout, *extra_args) + conn = connection.create_connection( + (self.host, self.port), self.timeout, **extra_kw) except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % (self.host, self.timeout)) - # Set options on the socket. - self._set_options_on(conn) - return conn def _prepare_conn(self, conn): @@ -132,14 +137,6 @@ class HTTPConnection(_HTTPConnection, object): # Mark this connection as not reusable self.auto_open = 0 - def _set_options_on(self, conn): - # Disable all socket options if the user passes ``socket_options=None`` - if self.socket_options is None: - return - - for opt in self.socket_options: - conn.setsockopt(*opt) - def connect(self): conn = self._new_conn() self._prepare_conn(conn) @@ -225,6 +222,8 @@ class VerifiedHTTPSConnection(HTTPSConnection): match_hostname(self.sock.getpeercert(), self.assert_hostname or hostname) + self.is_verified = resolved_cert_reqs == ssl.CERT_REQUIRED + if ssl: # Make a copy for testing. diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index ab205fa..9317fdc 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -1,12 +1,7 @@ -# urllib3/connectionpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - -import sys import errno import logging +import sys +import warnings from socket import error as SocketError, timeout as SocketTimeout import socket @@ -20,15 +15,16 @@ except ImportError: from .exceptions import ( ClosedPoolError, - ConnectionError, + ProtocolError, EmptyPoolError, HostChangedError, - LocationParseError, + LocationValueError, MaxRetryError, + ProxyError, + ReadTimeoutError, SSLError, TimeoutError, - ReadTimeoutError, - ProxyError, + InsecureRequestWarning, ) from .packages.ssl_match_hostname import CertificateError from .packages import six @@ -40,11 +36,11 @@ from .connection import ( ) from .request import RequestMethods from .response import HTTPResponse -from .util import ( - get_host, - is_connection_dropped, - Timeout, -) + +from .util.connection import is_connection_dropped +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host xrange = six.moves.xrange @@ -65,13 +61,11 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): - if host is None: - raise LocationParseError(host) + if not host: + raise LocationValueError("No host specified.") # httplib doesn't like it when we include brackets in ipv6 addresses - host = host.strip('[]') - - self.host = host + self.host = host.strip('[]') self.port = port def __str__(self): @@ -126,6 +120,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Headers to include with all requests, unless other headers are given explicitly. + :param retries: + Retry configuration to use by default with requests in this pool. + :param _proxy: Parsed proxy URL, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" @@ -144,18 +141,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None, **conn_kw): + headers=None, retries=None, + _proxy=None, _proxy_headers=None, + **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict - # This is for backwards compatibility and can be removed once a timeout - # can only be set to a Timeout object if not isinstance(timeout, Timeout): timeout = Timeout.from_float(timeout) + if retries is None: + retries = Retry.DEFAULT + self.timeout = timeout + self.retries = retries self.pool = self.QueueCls(maxsize) self.block = block @@ -259,6 +260,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn: conn.close() + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -290,9 +297,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_requests += 1 timeout_obj = self._get_timeout(timeout) - timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout + + # Trigger any extra validation we need to do. + self._validate_conn(conn) + # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) @@ -301,7 +311,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr - if hasattr(conn, 'sock'): + if getattr(conn, 'sock', None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -309,8 +319,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) else: # None or a value @@ -332,7 +341,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # http://bugs.python.org/issue10272 if 'timed out' in str(e) or \ 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise @@ -341,8 +351,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise @@ -388,7 +397,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): return (scheme, host, port) == (self.scheme, self.host, self.port) - def urlopen(self, method, url, body=None, headers=None, retries=3, + def urlopen(self, method, url, body=None, headers=None, retries=None, redirect=True, assert_same_host=True, timeout=_Default, pool_timeout=None, release_conn=None, **response_kw): """ @@ -422,9 +431,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): these headers completely replace any pool-specific headers. :param retries: - Number of retries to allow before raising a MaxRetryError exception. - If `False`, then retries are disabled and any exception is raised - immediately. + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. :param redirect: If True, automatically handle redirects (status codes 301, 302, @@ -463,15 +483,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0 and retries is not False: - raise MaxRetryError(self, url) + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - raise HostChangedError(self, url, retries - 1) + raise HostChangedError(self, url, retries) conn = None @@ -487,10 +507,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): err = None try: - # Request a connection from the queue + # Request a connection from the queue. conn = self._get_conn(timeout=pool_timeout) - # Make the request on the httplib connection object + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, timeout=timeout, body=body, headers=headers) @@ -529,21 +549,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() conn = None - if not retries: - if isinstance(e, TimeoutError): - # TimeoutError is exempt from MaxRetryError-wrapping. - # FIXME: ... Not sure why. Add a reason here. - raise - - # Wrap unexpected exceptions with the most appropriate - # module-level exception and re-raise. - if isinstance(e, SocketError) and self.proxy: - raise ProxyError('Cannot connect to proxy.', e) - - if retries is False: - raise ConnectionError('Connection failed.', e) + stacktrace = sys.exc_info()[2] + if isinstance(e, SocketError) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError('Connection aborted.', e) - raise MaxRetryError(self, url, e) + retries = retries.increment(method, url, error=e, + _pool=self, _stacktrace=stacktrace) + retries.sleep() # Keep track of the error for the retry warning. err = e @@ -557,23 +571,43 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warning("Retrying (%d attempts remain) after connection " + log.warning("Retrying (%r) after connection " "broken by '%r': %s" % (retries, err, url)) - return self.urlopen(method, url, body, headers, retries - 1, + return self.urlopen(method, url, body, headers, retries, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, release_conn=release_conn, **response_kw) # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location and retries is not False: + if redirect_location: if response.status == 303: method = 'GET' + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=response.status): + retries = retries.increment(method, url, response=response, _pool=self) + retries.sleep() + log.info("Forced retry: %s" % url) + return self.urlopen(method, url, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response @@ -600,8 +634,8 @@ class HTTPSConnectionPool(HTTPConnectionPool): ConnectionCls = HTTPSConnection def __init__(self, host, port=None, - strict=False, timeout=None, maxsize=1, - block=False, headers=None, + strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, + block=False, headers=None, retries=None, _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, @@ -609,7 +643,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): **conn_kw): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers, + block, headers, retries, _proxy, _proxy_headers, **conn_kw) self.key_file = key_file self.cert_file = cert_file @@ -640,7 +674,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older set_tunnel(self.host, self.port) else: @@ -677,6 +711,24 @@ class HTTPSConnectionPool(HTTPConnectionPool): return self._prepare_conn(conn) + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not conn.sock: + conn.connect() + + if not conn.is_verified: + warnings.warn(( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.org/en/latest/security.html ' + '(This warning will only appear once by default.)'), + InsecureRequestWarning) + def connection_from_url(url, **kw): """ @@ -693,7 +745,7 @@ def connection_from_url(url, **kw): :class:`.ConnectionPool`. Useful for specifying things like timeout, maxsize, headers, etc. - Example: :: + Example:: >>> conn = connection_from_url('http://google.com/') >>> r = conn.request('GET', '/') diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py index b8cd933..c6b266f 100644 --- a/urllib3/contrib/ntlmpool.py +++ b/urllib3/contrib/ntlmpool.py @@ -1,9 +1,3 @@ -# urllib3/contrib/ntlmpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ NTLM authenticating pool, contributed by erikcederstran diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 21a12c6..7a9ea2e 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -54,7 +54,6 @@ from pyasn1.type import univ, constraint from socket import _fileobject, timeout import ssl import select -from cStringIO import StringIO from .. import connection from .. import util @@ -155,196 +154,37 @@ def get_subj_alt_name(peer_cert): return dns_name -class fileobject(_fileobject): - - def _wait_for_sock(self): - rd, wd, ed = select.select([self._sock], [], [], - self._sock.gettimeout()) - if not rd: - raise timeout() - - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - try: - data = self._sock.recv(left) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self._sock.recv - while True: - try: - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - break - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - - class WrappedSocket(object): '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' - def __init__(self, connection, socket): + def __init__(self, connection, socket, suppress_ragged_eofs=True): self.connection = connection self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs def fileno(self): return self.socket.fileno() def makefile(self, mode, bufsize=-1): - return fileobject(self.connection, mode, bufsize) + return _fileobject(self, mode, bufsize) + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + else: + raise + except OpenSSL.SSL.WantReadError: + rd, wd, ed = select.select( + [self.socket], [], [], self.socket.gettimeout()) + if not rd: + raise timeout() + else: + return self.recv(*args, **kwargs) + else: + return data def settimeout(self, timeout): return self.socket.settimeout(timeout) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index b4df831..fff8bfa 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -1,9 +1,3 @@ -# urllib3/exceptions.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - ## Base Exceptions @@ -11,6 +5,11 @@ class HTTPError(Exception): "Base exception used by this module." pass +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + class PoolError(HTTPError): "Base exception for errors caused within a pool." @@ -44,16 +43,20 @@ class ProxyError(HTTPError): pass -class ConnectionError(HTTPError): - "Raised when a normal connection fails." +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." pass -class DecodeError(HTTPError): - "Raised when automatic decoding based on Content-Type fails." +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." pass +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + ## Leaf Exceptions class MaxRetryError(RequestError): @@ -64,7 +67,7 @@ class MaxRetryError(RequestError): message = "Max retries exceeded with url: %s" % url if reason: - message += " (Caused by %s: %s)" % (type(reason), reason) + message += " (Caused by %r)" % reason else: message += " (Caused by redirect)" @@ -116,7 +119,12 @@ class ClosedPoolError(PoolError): pass -class LocationParseError(ValueError, HTTPError): +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): "Raised when get_host or similar fails to parse the URL input." def __init__(self, location): @@ -124,3 +132,8 @@ class LocationParseError(ValueError, HTTPError): HTTPError.__init__(self, message) self.location = location + + +class InsecureRequestWarning(HTTPWarning): + "Warned when making an unverified HTTPS request." + pass diff --git a/urllib3/fields.py b/urllib3/fields.py index dceafb4..c853f8d 100644 --- a/urllib3/fields.py +++ b/urllib3/fields.py @@ -1,9 +1,3 @@ -# urllib3/fields.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import email.utils import mimetypes @@ -81,7 +75,7 @@ class RequestField(object): Supports constructing :class:`~urllib3.fields.RequestField` from parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where the MIME type is optional. - For example: :: + For example:: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), diff --git a/urllib3/filepost.py b/urllib3/filepost.py index c3db30c..0fbf488 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -1,9 +1,3 @@ -# urllib3/filepost.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import codecs from uuid import uuid4 diff --git a/urllib3/packages/ordered_dict.py b/urllib3/packages/ordered_dict.py index 7f8ee15..4479363 100644 --- a/urllib3/packages/ordered_dict.py +++ b/urllib3/packages/ordered_dict.py @@ -2,7 +2,6 @@ # Passes Python2.7's test suite and incorporates all the latest updates. # Copyright 2009 Raymond Hettinger, released under the MIT License. # http://code.activestate.com/recipes/576693/ - try: from thread import get_ident as _get_ident except ImportError: diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 3945f5d..515dc96 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,9 +1,3 @@ -# urllib3/poolmanager.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import logging try: # Python 3 @@ -14,8 +8,10 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme +from .exceptions import LocationValueError from .request import RequestMethods -from .util import parse_url +from .util.url import parse_url +from .util.retry import Retry __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -49,7 +45,7 @@ class PoolManager(RequestMethods): Additional parameters are used to create fresh :class:`urllib3.connectionpool.ConnectionPool` instances. - Example: :: + Example:: >>> manager = PoolManager(num_pools=2) >>> r = manager.request('GET', 'http://google.com/') @@ -102,10 +98,11 @@ class PoolManager(RequestMethods): ``urllib3.connectionpool.port_by_scheme``. """ - scheme = scheme or 'http' + if not host: + raise LocationValueError("No host specified.") + scheme = scheme or 'http' port = port or port_by_scheme.get(scheme, 80) - pool_key = (scheme, host, port) with self.pools.lock: @@ -118,6 +115,7 @@ class PoolManager(RequestMethods): # Make a fresh ConnectionPool of the desired type pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool + return pool def connection_from_url(self, url): @@ -165,9 +163,14 @@ class PoolManager(RequestMethods): if response.status == 303: method = 'GET' - log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + kw['retries'] = retries.increment(method, redirect_location) kw['redirect'] = redirect + + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, **kw) @@ -208,12 +211,16 @@ class ProxyManager(PoolManager): if not proxy.port: port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) + + assert proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % proxy.scheme + self.proxy = proxy self.proxy_headers = proxy_headers or {} - assert self.proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( num_pools, headers, **connection_pool_kw) @@ -248,10 +255,10 @@ class ProxyManager(PoolManager): # For proxied HTTPS requests, httplib sets the necessary headers # on the CONNECT to the proxy. For HTTP, we'll definitely # need to set 'Host' at the very least. - kw['headers'] = self._set_proxy_headers(url, kw.get('headers', - self.headers)) + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) - return super(ProxyManager, self).urlopen(method, url, redirect, **kw) + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) def proxy_from_url(url, **kw): diff --git a/urllib3/request.py b/urllib3/request.py index 7a46f1b..51fe238 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,9 +1,3 @@ -# urllib3/request.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - try: from urllib.parse import urlencode except ImportError: @@ -105,7 +99,7 @@ class RequestMethods(object): Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where - the MIME type is optional. For example: :: + the MIME type is optional. For example:: fields = { 'foo': 'bar', diff --git a/urllib3/response.py b/urllib3/response.py index 13ffba4..7e0d47f 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -1,18 +1,13 @@ -# urllib3/response.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - import zlib import io from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import DecodeError, ReadTimeoutError +from .exceptions import ProtocolError, DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type -from .util import is_fp_closed +from .connection import HTTPException, BaseSSLError +from .util.response import is_fp_closed + class DeflateDecoder(object): @@ -88,11 +83,14 @@ class HTTPResponse(io.IOBase): self.decode_content = decode_content self._decoder = None - self._body = body if body and isinstance(body, basestring) else None + self._body = None self._fp = None self._original_response = original_response self._fp_bytes_read = 0 + if body and isinstance(body, (basestring, binary_type)): + self._body = body + self._pool = pool self._connection = connection @@ -199,6 +197,19 @@ class HTTPResponse(io.IOBase): # there is yet no clean way to get at it from this context. raise ReadTimeoutError(self._pool, None, 'Read timed out.') + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if not 'read operation timed out' in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + self._fp_bytes_read += len(data) try: diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py index a40185e..8becc81 100644 --- a/urllib3/util/__init__.py +++ b/urllib3/util/__init__.py @@ -1,9 +1,4 @@ -# urllib3/util/__init__.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - +# For backwards compatibility, provide imports that used to be here. from .connection import is_connection_dropped from .request import make_headers from .response import is_fp_closed @@ -19,6 +14,8 @@ from .timeout import ( current_time, Timeout, ) + +from .retry import Retry from .url import ( get_host, parse_url, diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index c67ef04..062ee9d 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -1,4 +1,4 @@ -from socket import error as SocketError +import socket try: from select import poll, POLLIN except ImportError: # `poll` doesn't exist on OSX and other platforms @@ -31,7 +31,7 @@ def is_connection_dropped(conn): # Platform-specific try: return select([sock], [], [], 0.0)[0] - except SocketError: + except socket.error: return True # This version is better on platforms that support it. @@ -41,3 +41,55 @@ def is_connection_dropped(conn): # Platform-specific if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) diff --git a/urllib3/util/request.py b/urllib3/util/request.py index bfd7a98..bc64f6b 100644 --- a/urllib3/util/request.py +++ b/urllib3/util/request.py @@ -1,7 +1,6 @@ from base64 import b64encode -from ..packages import six - +from ..packages.six import b ACCEPT_ENCODING = 'gzip,deflate' @@ -29,13 +28,13 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, auth header. :param proxy_basic_auth: - Colon-separated username:password string for - 'proxy-authorization: basic ...' auth header. + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. :param disable_cache: If ``True``, adds 'cache-control: no-cache' header. - Example: :: + Example:: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} @@ -60,11 +59,11 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, if basic_auth: headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') + b64encode(b(basic_auth)).decode('utf-8') if proxy_basic_auth: headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') + b64encode(b(proxy_basic_auth)).decode('utf-8') if disable_cache: headers['cache-control'] = 'no-cache' diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py new file mode 100644 index 0000000..9013197 --- /dev/null +++ b/urllib3/util/retry.py @@ -0,0 +1,279 @@ +import time +import logging + +from ..exceptions import ( + ProtocolError, + ConnectTimeoutError, + ReadTimeoutError, + MaxRetryError, +) +from ..packages import six + + +log = logging.getLogger(__name__) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + indempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + :param iterable status_forcelist: + A set of HTTP status codes that we should force a retry on. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts. urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.MAX_BACKOFF`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, _observed_errors=0): + + self.total = total + self.connect = connect + self.read = read + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self._observed_errors = _observed_errors # TODO: use .history instead? + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + _observed_errors=self._observed_errors, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + if self._observed_errors <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def sleep(self): + """ Sleep between retry attempts using an exponential backoff. + + By default, the backoff factor is 0 and this method will return + immediately. + """ + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we can't + assume that the server did not process any of it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def is_forced_retry(self, method, status_code): + """ Is this method/response retryable? (Based on method/codes whitelists) + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return self.status_forcelist and status_code in self.status_forcelist + + def is_exhausted(self): + """ Are we out of retries? + """ + retry_counts = (self.total, self.connect, self.read, self.redirect) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + _observed_errors = self._observed_errors + connect = self.connect + read = self.read + redirect = self.redirect + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + _observed_errors += 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False: + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + _observed_errors += 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + + else: + # FIXME: Nothing changed, scenario doesn't make sense. + _observed_errors += 1 + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, + _observed_errors=_observed_errors) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error) + + log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + + return new_retry + + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py index dee4b87..9cfe2d2 100644 --- a/urllib3/util/ssl_.py +++ b/urllib3/util/ssl_.py @@ -34,10 +34,9 @@ def assert_fingerprint(cert, fingerprint): } fingerprint = fingerprint.replace(':', '').lower() + digest_length, odd = divmod(len(fingerprint), 2) - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: + if odd or digest_length not in hashfunc_map: raise SSLError('Fingerprint is of invalid length.') # We need encode() here for py32; works on py2 and p33. diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py index aaadc12..ea7027f 100644 --- a/urllib3/util/timeout.py +++ b/urllib3/util/timeout.py @@ -1,32 +1,49 @@ +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user from socket import _GLOBAL_DEFAULT_TIMEOUT import time from ..exceptions import TimeoutStateError +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() def current_time(): """ - Retrieve the current time, this function is mocked out in unit testing. + Retrieve the current time. This function is mocked out in unit testing. """ return time.time() -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout +class Timeout(object): + """ Timeout configuration. + Timeouts can be defined as a default for a pool:: -class Timeout(object): - """ - Utility object for storing timeout values. + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) - Example usage: + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. - .. code-block:: python + Defaults to None. - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc + :type total: integer, float, or None :param connect: The maximum amount of time to wait for a connection attempt to a server @@ -47,25 +64,15 @@ class Timeout(object): :type read: integer, float, or None - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - .. note:: Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. In addition, the read and total timeouts only measure the time between read operations on the socket connecting the client and the server, @@ -73,8 +80,8 @@ class Timeout(object): response. For most requests, the timeout is raised because the server has not sent the first byte in the specified time. This is not always the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. + of 20 seconds will not trigger, even though the request will take + several minutes to complete. If your goal is to cut off any request after a set amount of wall clock time, consider having a second "watcher" thread to cut off a slow diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 122108b..487d456 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -2,6 +2,7 @@ from collections import namedtuple from ..exceptions import LocationParseError + url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -47,7 +48,7 @@ def split_first(s, delims): If not found, then the first part is the full input string. - Example: :: + Example:: >>> split_first('foo/bar?baz', '?/=') ('foo', 'bar?baz', '/') @@ -80,7 +81,7 @@ def parse_url(url): Partly backwards-compatible with :mod:`urlparse`. - Example: :: + Example:: >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) @@ -95,6 +96,10 @@ def parse_url(url): # Additionally, this implementations does silly things to be optimal # on CPython. + if not url: + # Empty + return Url() + scheme = None auth = None host = None -- cgit v1.2.3 From 54bdd56778a37ea9d56d451d4ae49b99cbbfceaa Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:39 -0700 Subject: Imported Upstream version 1.9.1 --- CHANGES.rst | 21 + CONTRIBUTORS.txt | 10 + MANIFEST.in | 5 +- Makefile | 1 + PKG-INFO | 26 +- README.rst | 3 + dev-requirements.txt | 4 +- docs/Makefile | 130 +++++ docs/README | 14 + docs/collections.rst | 13 + docs/conf.py | 232 ++++++++ docs/contrib.rst | 14 + docs/doc-requirements.txt | 12 + docs/exceptions.rst | 7 + docs/helpers.rst | 55 ++ docs/index.rst | 340 +++++++++++ docs/make.bat | 170 ++++++ docs/managers.rst | 62 ++ docs/pools.rst | 71 +++ docs/security.rst | 161 ++++++ dummyserver/__init__.pyc | Bin 0 -> 141 bytes dummyserver/handlers.pyc | Bin 0 -> 9742 bytes dummyserver/proxy.pyc | Bin 0 -> 4740 bytes dummyserver/server.pyc | Bin 0 -> 5992 bytes dummyserver/testcase.pyc | Bin 0 -> 5450 bytes test/__init__.py | 92 +++ test/__init__.pyc | Bin 0 -> 3946 bytes test/benchmark.py | 77 +++ test/contrib/__init__.py | 0 test/contrib/__init__.pyc | Bin 0 -> 142 bytes test/contrib/test_pyopenssl.py | 23 + test/contrib/test_pyopenssl.pyc | Bin 0 -> 1143 bytes test/port_helpers.py | 100 ++++ test/port_helpers.pyc | Bin 0 -> 5719 bytes test/test_collections.pyc | Bin 0 -> 6842 bytes test/test_compatibility.pyc | Bin 0 -> 1372 bytes test/test_connectionpool.pyc | Bin 0 -> 8862 bytes test/test_exceptions.pyc | Bin 0 -> 1931 bytes test/test_fields.pyc | Bin 0 -> 2739 bytes test/test_filepost.pyc | Bin 0 -> 4916 bytes test/test_poolmanager.pyc | Bin 0 -> 2499 bytes test/test_proxymanager.pyc | Bin 0 -> 1670 bytes test/test_response.py | 31 + test/test_response.pyc | Bin 0 -> 14619 bytes test/test_retry.pyc | Bin 0 -> 6491 bytes test/test_util.py | 31 + test/test_util.pyc | Bin 0 -> 15036 bytes test/with_dummyserver/__init__.py | 0 test/with_dummyserver/__init__.pyc | Bin 0 -> 151 bytes test/with_dummyserver/test_connectionpool.py | 706 +++++++++++++++++++++++ test/with_dummyserver/test_connectionpool.pyc | Bin 0 -> 27640 bytes test/with_dummyserver/test_https.py | 374 ++++++++++++ test/with_dummyserver/test_https.pyc | Bin 0 -> 15651 bytes test/with_dummyserver/test_poolmanager.py | 136 +++++ test/with_dummyserver/test_poolmanager.pyc | Bin 0 -> 5591 bytes test/with_dummyserver/test_proxy_poolmanager.py | 263 +++++++++ test/with_dummyserver/test_proxy_poolmanager.pyc | Bin 0 -> 9891 bytes test/with_dummyserver/test_socketlevel.py | 544 +++++++++++++++++ test/with_dummyserver/test_socketlevel.pyc | Bin 0 -> 18715 bytes urllib3.egg-info/PKG-INFO | 26 +- urllib3.egg-info/SOURCES.txt | 50 ++ urllib3/__init__.py | 4 +- urllib3/connection.py | 43 +- urllib3/connectionpool.py | 6 +- urllib3/contrib/pyopenssl.py | 36 +- urllib3/exceptions.py | 21 +- urllib3/response.py | 15 +- urllib3/util/connection.py | 8 +- urllib3/util/response.py | 17 +- urllib3/util/retry.py | 2 +- 70 files changed, 3919 insertions(+), 37 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/README create mode 100644 docs/collections.rst create mode 100644 docs/conf.py create mode 100644 docs/contrib.rst create mode 100644 docs/doc-requirements.txt create mode 100644 docs/exceptions.rst create mode 100644 docs/helpers.rst create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/managers.rst create mode 100644 docs/pools.rst create mode 100644 docs/security.rst create mode 100644 dummyserver/__init__.pyc create mode 100644 dummyserver/handlers.pyc create mode 100644 dummyserver/proxy.pyc create mode 100644 dummyserver/server.pyc create mode 100644 dummyserver/testcase.pyc create mode 100644 test/__init__.py create mode 100644 test/__init__.pyc create mode 100644 test/benchmark.py create mode 100644 test/contrib/__init__.py create mode 100644 test/contrib/__init__.pyc create mode 100644 test/contrib/test_pyopenssl.py create mode 100644 test/contrib/test_pyopenssl.pyc create mode 100644 test/port_helpers.py create mode 100644 test/port_helpers.pyc create mode 100644 test/test_collections.pyc create mode 100644 test/test_compatibility.pyc create mode 100644 test/test_connectionpool.pyc create mode 100644 test/test_exceptions.pyc create mode 100644 test/test_fields.pyc create mode 100644 test/test_filepost.pyc create mode 100644 test/test_poolmanager.pyc create mode 100644 test/test_proxymanager.pyc create mode 100644 test/test_response.pyc create mode 100644 test/test_retry.pyc create mode 100644 test/test_util.pyc create mode 100644 test/with_dummyserver/__init__.py create mode 100644 test/with_dummyserver/__init__.pyc create mode 100644 test/with_dummyserver/test_connectionpool.py create mode 100644 test/with_dummyserver/test_connectionpool.pyc create mode 100644 test/with_dummyserver/test_https.py create mode 100644 test/with_dummyserver/test_https.pyc create mode 100644 test/with_dummyserver/test_poolmanager.py create mode 100644 test/with_dummyserver/test_poolmanager.pyc create mode 100644 test/with_dummyserver/test_proxy_poolmanager.py create mode 100644 test/with_dummyserver/test_proxy_poolmanager.pyc create mode 100644 test/with_dummyserver/test_socketlevel.py create mode 100644 test/with_dummyserver/test_socketlevel.pyc diff --git a/CHANGES.rst b/CHANGES.rst index 9ada9c2..dd2cd2d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,27 @@ Changes ======= +1.9.1 (2014-09-13) +++++++++++++++++++ + +* Apply socket arguments before binding. (Issue #427) + +* More careful checks if fp-like object is closed. (Issue #435) + +* Fixed packaging issues of some development-related files not + getting included. (Issue #440) + +* Allow performing *only* fingerprint verification. (Issue #444) + +* Emit ``SecurityWarning`` if system clock is waaay off. (Issue #445) + +* Fixed PyOpenSSL compatibility with PyPy. (Issue #450) + +* Fixed ``BrokenPipeError`` and ``ConnectionError`` handling in Py3. + (Issue #443) + + + 1.9 (2014-07-04) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e6178f1..97f3014 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -117,5 +117,15 @@ In chronological order: * Arthur Grunseid * source_address support and tests (with https://github.com/bui) +* Ian Cordasco + * PEP8 Compliance and Linting + * Add ability to pass socket options to an HTTP Connection + +* Erik Tollerud + * Support for standard library io module. + +* Krishna Prasad + * Google App Engine documentation + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/MANIFEST.in b/MANIFEST.in index 6b37d64..4edfedd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,5 @@ include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt dev-requirements.txt Makefile -recursive-include dummyserver *.* +recursive-include dummyserver * +recursive-include test * +recursive-include docs * +recursive-exclude docs/_build * diff --git a/Makefile b/Makefile index a6cdcfb..b692b12 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,7 @@ clean: find . -name "*.py[oc]" -delete find . -name "__pycache__" -delete rm -f $(REQUIREMENTS_OUT) + rm -rf docs/_build test: requirements nosetests diff --git a/PKG-INFO b/PKG-INFO index 168944c..964cd4b 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.9 +Version: 1.9.1 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -13,6 +13,9 @@ Description: ======= .. image:: https://travis-ci.org/shazow/urllib3.png?branch=master :target: https://travis-ci.org/shazow/urllib3 + .. image:: https://www.bountysource.com/badge/tracker?tracker_id=192525 + :target: https://www.bountysource.com/trackers/192525-urllib3?utm_source=192525&utm_medium=shield&utm_campaign=TRACKER_BADGE + Highlights ========== @@ -153,6 +156,27 @@ Description: ======= Changes ======= + 1.9.1 (2014-09-13) + ++++++++++++++++++ + + * Apply socket arguments before binding. (Issue #427) + + * More careful checks if fp-like object is closed. (Issue #435) + + * Fixed packaging issues of some development-related files not + getting included. (Issue #440) + + * Allow performing *only* fingerprint verification. (Issue #444) + + * Emit ``SecurityWarning`` if system clock is waaay off. (Issue #445) + + * Fixed PyOpenSSL compatibility with PyPy. (Issue #450) + + * Fixed ``BrokenPipeError`` and ``ConnectionError`` handling in Py3. + (Issue #443) + + + 1.9 (2014-07-04) ++++++++++++++++ diff --git a/README.rst b/README.rst index 6a81759..fc6bccf 100644 --- a/README.rst +++ b/README.rst @@ -5,6 +5,9 @@ urllib3 .. image:: https://travis-ci.org/shazow/urllib3.png?branch=master :target: https://travis-ci.org/shazow/urllib3 +.. image:: https://www.bountysource.com/badge/tracker?tracker_id=192525 + :target: https://www.bountysource.com/trackers/192525-urllib3?utm_source=192525&utm_medium=shield&utm_campaign=TRACKER_BADGE + Highlights ========== diff --git a/dev-requirements.txt b/dev-requirements.txt index 6de0e09..8010704 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,7 @@ nose==1.3.3 mock==1.0.1 -tornado==3.2.2 coverage==3.7.1 tox==1.7.1 + +# Tornado 3.2.2 makes our tests flaky, so we stick with 3.1 +tornado==3.1.1 diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..135c543 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,130 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/urllib3.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/urllib3.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/urllib3" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/urllib3" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/docs/README b/docs/README new file mode 100644 index 0000000..9126c73 --- /dev/null +++ b/docs/README @@ -0,0 +1,14 @@ +# Building the Docs + +First install Sphinx: + + pip install sphinx + +Install pyopenssl and certifi dependencies, to avoid some build errors. (Optional) + + # This step is optional + pip install ndg-httpsclient pyasn1 certifi + +Then build: + + cd docs && make html diff --git a/docs/collections.rst b/docs/collections.rst new file mode 100644 index 0000000..b348140 --- /dev/null +++ b/docs/collections.rst @@ -0,0 +1,13 @@ +Collections +=========== + +These datastructures are used to implement the behaviour of various urllib3 +components in a decoupled and application-agnostic design. + +.. automodule:: urllib3._collections + + .. autoclass:: RecentlyUsedContainer + :members: + + .. autoclass:: HTTPHeaderDict + :members: diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..7ac8393 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +# +# urllib3 documentation build configuration file, created by +# sphinx-quickstart on Wed Oct 5 13:15:40 2011. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +from datetime import date +import os +import sys + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +sys.path.insert(0, root_path) + +import urllib3 + + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', +] + +# Test code blocks only when explicitly specified +doctest_test_doctest_blocks = '' + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'urllib3' +copyright = u'{year}, Andrey Petrov'.format(year=date.today().year) + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = urllib3.__version__ +# The full version, including alpha/beta/rc tags. +release = version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'nature' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +#html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'urllib3doc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'urllib3.tex', u'urllib3 Documentation', + u'Andrey Petrov', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'urllib3', u'urllib3 Documentation', + [u'Andrey Petrov'], 1) +] + +intersphinx_mapping = {'python': ('http://docs.python.org/2.7', None)} diff --git a/docs/contrib.rst b/docs/contrib.rst new file mode 100644 index 0000000..99c5492 --- /dev/null +++ b/docs/contrib.rst @@ -0,0 +1,14 @@ +.. _contrib-modules: + +Contrib Modules +=============== + +These modules implement various extra features, that may not be ready for +prime time. + +.. _pyopenssl: + +SNI-support for Python 2 +------------------------ + +.. automodule:: urllib3.contrib.pyopenssl diff --git a/docs/doc-requirements.txt b/docs/doc-requirements.txt new file mode 100644 index 0000000..b7b6d66 --- /dev/null +++ b/docs/doc-requirements.txt @@ -0,0 +1,12 @@ +ndg-httpsclient==0.3.2 +pyasn1==0.1.7 +Sphinx==1.2.2 +Jinja2==2.7.3 +MarkupSafe==0.23 +Pygments==1.6 +cryptography==0.4 +six==1.7.2 +cffi==0.8.2 +docutils==0.11 +pycparser==2.10 +certifi==14.05.14 \ No newline at end of file diff --git a/docs/exceptions.rst b/docs/exceptions.rst new file mode 100644 index 0000000..f9e0553 --- /dev/null +++ b/docs/exceptions.rst @@ -0,0 +1,7 @@ +Exceptions +========== + +Custom exceptions defined by urllib3 + +.. automodule:: urllib3.exceptions + :members: diff --git a/docs/helpers.rst b/docs/helpers.rst new file mode 100644 index 0000000..79f268b --- /dev/null +++ b/docs/helpers.rst @@ -0,0 +1,55 @@ +Helpers +======= + +Useful methods for working with :mod:`httplib`, completely decoupled from +code specific to **urllib3**. + + +Timeouts +-------- + +.. automodule:: urllib3.util.timeout + :members: + +Retries +------- + +.. automodule:: urllib3.util.retry + :members: + +URL Helpers +----------- + +.. automodule:: urllib3.util.url + :members: + +Filepost +-------- + +.. automodule:: urllib3.filepost + :members: + +.. automodule:: urllib3.fields + :members: + +Request +------- + +.. automodule:: urllib3.request + :members: + +.. automodule:: urllib3.util.request + :members: + +Response +-------- + +.. automodule:: urllib3.response + :members: + :undoc-members: + +SSL/TLS Helpers +--------------- + +.. automodule:: urllib3.util.ssl_ + :members: diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..1fc8a9c --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,340 @@ +===================== +urllib3 Documentation +===================== + +.. toctree:: + :hidden: + + pools + managers + security + helpers + collections + contrib + + +Highlights +========== + +- Re-use the same socket connection for multiple requests, with optional + client-side certificate verification. See: + :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.connectionpool.HTTPSConnectionPool` + +- File posting. See: + :func:`~urllib3.filepost.encode_multipart_formdata` + +- Built-in redirection and retries (optional). + +- Supports gzip and deflate decoding. See: + :func:`~urllib3.response.decode_gzip` and + :func:`~urllib3.response.decode_deflate` + +- Thread-safe and sanity-safe. + +- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. + +- Works with AppEngine, gevent, eventlib, and the standard library :mod:`io` module. + +- Small and easy to understand codebase perfect for extending and building upon. + For a more comprehensive solution, have a look at + `Requests `_ which is also powered by urllib3. + + +Getting Started +=============== + +Installing +---------- + +``pip install urllib3`` or fetch the latest source from +`github.com/shazow/urllib3 `_. + +Usage +----- + +.. doctest :: + + >>> import urllib3 + >>> http = urllib3.PoolManager() + >>> r = http.request('GET', 'http://example.com/') + >>> r.status + 200 + >>> r.headers['server'] + 'ECS (iad/182A)' + >>> 'data: ' + r.data + 'data: ...' + + +**By default, urllib3 does not verify your HTTPS requests**. +You'll need to supply a root certificate bundle, or use `certifi +`_ + +.. doctest :: + + >>> import urllib3, certifi + >>> http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) + >>> r = http.request('GET', 'https://insecure.com/') + Traceback (most recent call last): + ... + SSLError: hostname 'insecure.com' doesn't match 'svn.nmap.org' + +For more on making secure SSL/TLS HTTPS requests, read the :ref:`Security +section `. + + +urllib3's responses respect the :mod:`io` framework from Python's +standard library, allowing use of these standard objects for purposes +like buffering: + +.. doctest :: + + >>> http = urllib3.PoolManager() + >>> r = http.urlopen('GET','http://example.com/', preload_content=False) + >>> b = io.BufferedReader(r, 2048) + >>> firstpart = b.read(100) + >>> # ... your internet connection fails momentarily ... + >>> secondpart = b.read() + + +Components +========== + +:mod:`urllib3` tries to strike a fine balance between power, extendability, and +sanity. To achieve this, the codebase is a collection of small reusable +utilities and abstractions composed together in a few helpful layers. + + +PoolManager +----------- + +The highest level is the :doc:`PoolManager(...) `. + +The :class:`~urllib3.poolmanagers.PoolManager` will take care of reusing +connections for you whenever you request the same host. This should cover most +scenarios without significant loss of efficiency, but you can always drop down +to a lower level component for more granular control. + +.. doctest :: + + >>> import urllib3 + >>> http = urllib3.PoolManager(10) + >>> r1 = http.request('GET', 'http://example.com/') + >>> r2 = http.request('GET', 'http://httpbin.org/') + >>> r3 = http.request('GET', 'http://httpbin.org/get') + >>> len(http.pools) + 2 + +A :class:`~urllib3.poolmanagers.PoolManager` is a proxy for a collection of +:class:`ConnectionPool` objects. They both inherit from +:class:`~urllib3.request.RequestMethods` to make sure that their API is +similar, so that instances of either can be passed around interchangeably. + + +ProxyManager +------------ + +The :class:`~urllib3.poolmanagers.ProxyManager` is an HTTP proxy-aware +subclass of :class:`~urllib3.poolmanagers.PoolManager`. It produces a single +:class:`~urllib3.connectionpool.HTTPConnectionPool` instance for all HTTP +connections and individual per-server:port +:class:`~urllib3.connectionpool.HTTPSConnectionPool` instances for tunnelled +HTTPS connections: + +:: + + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + + +ConnectionPool +-------------- + +The next layer is the :doc:`ConnectionPool(...) `. + +The :class:`~urllib3.connectionpool.HTTPConnectionPool` and +:class:`~urllib3.connectionpool.HTTPSConnectionPool` classes allow you to +define a pool of connections to a single host and make requests against this +pool with automatic **connection reusing** and **thread safety**. + +When the :mod:`ssl` module is available, then +:class:`~urllib3.connectionpool.HTTPSConnectionPool` objects can be configured +to check SSL certificates against specific provided certificate authorities. + +.. doctest :: + + >>> import urllib3 + >>> conn = urllib3.connection_from_url('http://httpbin.org/') + >>> r1 = conn.request('GET', 'http://httpbin.org/') + >>> r2 = conn.request('GET', '/user-agent') + >>> r3 = conn.request('GET', 'http://example.com') + Traceback (most recent call last): + ... + urllib3.exceptions.HostChangedError: HTTPConnectionPool(host='httpbin.org', port=None): Tried to open a foreign host with url: http://example.com + +Again, a ConnectionPool is a pool of connections to a specific host. Trying to +access a different host through the same pool will raise a ``HostChangedError`` +exception unless you specify ``assert_same_host=False``. Do this at your own +risk as the outcome is completely dependent on the behaviour of the host server. + +If you need to access multiple hosts and don't want to manage your own +collection of :class:`~urllib3.connectionpool.ConnectionPool` objects, then you +should use a :class:`~urllib3.poolmanager.PoolManager`. + +A :class:`~urllib3.connectionpool.ConnectionPool` is composed of a collection +of :class:`httplib.HTTPConnection` objects. + + +Timeout +------- + +A timeout can be set to abort socket operations on individual connections after +the specified duration. The timeout can be defined as a float or an instance of +:class:`~urllib3.util.timeout.Timeout` which gives more granular configuration +over how much time is allowed for different stages of the request. This can be +set for the entire pool or per-request. + +.. doctest :: + + >>> from urllib3 import PoolManager, Timeout + + >>> # Manager with 3 seconds combined timeout. + >>> http = PoolManager(timeout=3.0) + >>> r = http.request('GET', 'http://httpbin.org/delay/1') + + >>> # Manager with 2 second timeout for the read phase, no limit for the rest. + >>> http = PoolManager(timeout=Timeout(read=2.0)) + >>> r = http.request('GET', 'http://httpbin.org/delay/1') + + >>> # Manager with no timeout but a request with a timeout of 1 seconds for + >>> # the connect phase and 2 seconds for the read phase. + >>> http = PoolManager() + >>> r = http.request('GET', 'http://httpbin.org/delay/1', timeout=Timeout(connect=1.0, read=2.0)) + + >>> # Same Manager but request with a 5 second total timeout. + >>> r = http.request('GET', 'http://httpbin.org/delay/1', timeout=Timeout(total=5.0)) + +See the :class:`~urllib3.util.timeout.Timeout` definition for more details. + + +Retry +----- + +Retries can be configured by passing an instance of +:class:`~urllib3.util.retry.Retry`, or disabled by passing ``False``, to the +``retries`` parameter. + +Redirects are also considered to be a subset of retries but can be configured or +disabled individually. + +:: + + >>> from urllib3 import PoolManager, Retry + + >>> # Allow 3 retries total for all requests in this pool. These are the same: + >>> http = PoolManager(retries=3) + >>> http = PoolManager(retries=Retry(3)) + >>> http = PoolManager(retries=Retry(total=3)) + + >>> r = http.request('GET', 'http://httpbin.org/redirect/2') + >>> # r.status -> 200 + + >>> # Disable redirects for this request. + >>> r = http.request('GET', 'http://httpbin.org/redirect/2', retries=Retry(3, redirect=False)) + >>> # r.status -> 302 + + >>> # No total limit, but only do 5 connect retries, for this request. + >>> r = http.request('GET', 'http://httpbin.org/', retries=Retry(connect=5)) + + +See the :class:`~urllib3.util.retry.Retry` definition for more details. + + +Foundation +---------- + +At the very core, just like its predecessors, :mod:`urllib3` is built on top of +:mod:`httplib` -- the lowest level HTTP library included in the Python +standard library. + +To aid the limited functionality of the :mod:`httplib` module, :mod:`urllib3` +provides various helper methods which are used with the higher level components +but can also be used independently. + +.. toctree:: + + helpers + exceptions + + +Contrib Modules +--------------- + +These modules implement various extra features, that may not be ready for +prime time. + +.. toctree:: + + contrib + + +Contributing +============ + +#. `Check for open issues `_ or open + a fresh issue to start a discussion around a feature idea or a bug. There is + a *Contributor Friendly* tag for issues that should be ideal for people who + are not very familiar with the codebase yet. +#. Fork the `urllib3 repository on Github `_ + to start making your changes. +#. Write a test which shows that the bug was fixed or that the feature works + as expected. +#. Send a pull request and bug the maintainer until it gets merged and published. + :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + + +Sponsorship +=========== + +Please consider sponsoring urllib3 development, especially if your company +benefits from this library. + +* **Project Grant**: A grant for contiguous full-time development has the + biggest impact for progress. Periods of 3 to 10 days allow a contributor to + tackle substantial complex issues which are otherwise left to linger until + somebody can't afford to not fix them. + + Contact `@shazow `_ to arrange a grant for a core + contributor. + +* **One-off**: Development will continue regardless of funding, but donations help move + things further along quicker as the maintainer can allocate more time off to + work on urllib3 specifically. + + .. raw:: html + + Sponsor with Credit Card + + Sponsor with Bitcoin + +* **Recurring**: You're welcome to `support the maintainer on Gittip + `_. + + +Recent Sponsors +--------------- + +Huge thanks to all the companies and individuals who financially contributed to +the development of urllib3. Please send a PR if you've donated and would like +to be listed. + +* `Stripe `_ (June 23, 2014) + +.. * [Company] ([optional tagline]), [optional description of grant] ([date]) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..41aa35b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,170 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\urllib3.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\urllib3.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/docs/managers.rst b/docs/managers.rst new file mode 100644 index 0000000..f9cab03 --- /dev/null +++ b/docs/managers.rst @@ -0,0 +1,62 @@ +PoolManager +=========== + +.. automodule:: urllib3.poolmanager + +A pool manager is an abstraction for a collection of +:doc:`ConnectionPools `. + +If you need to make requests to multiple hosts, then you can use a +:class:`.PoolManager`, which takes care of maintaining your pools +so you don't have to. + +.. doctest :: + + >>> from urllib3 import PoolManager + >>> manager = PoolManager(10) + >>> r = manager.request('GET', 'http://example.com') + >>> r.headers['server'] + 'ECS (iad/182A)' + >>> r = manager.request('GET', 'http://httpbin.org/') + >>> r.headers['server'] + 'gunicorn/18.0' + >>> r = manager.request('POST', 'http://httpbin.org/headers') + >>> r = manager.request('HEAD', 'http://httpbin.org/cookies') + >>> len(manager.pools) + 2 + >>> conn = manager.connection_from_host('httpbin.org') + >>> conn.num_requests + 3 + +The API of a :class:`.PoolManager` object is similar to that of a +:doc:`ConnectionPool `, so they can be passed around interchangeably. + +The PoolManager uses a Least Recently Used (LRU) policy for discarding old +pools. That is, if you set the PoolManager ``num_pools`` to 10, then after +making requests to 11 or more different hosts, the least recently used pools +will be cleaned up eventually. + +Cleanup of stale pools does not happen immediately. You can read more about the +implementation and the various adjustable variables within +:class:`~urllib3._collections.RecentlyUsedContainer`. + +API +--- + + .. autoclass:: PoolManager + :inherited-members: + +ProxyManager +============ + +:class:`.ProxyManager` is an HTTP proxy-aware subclass of :class:`.PoolManager`. +It produces a single +:class:`~urllib3.connectionpool.HTTPConnectionPool` instance for all HTTP +connections and individual per-server:port +:class:`~urllib3.connectionpool.HTTPSConnectionPool` instances for tunnelled +HTTPS connections. + +API +--- + .. autoclass:: ProxyManager + diff --git a/docs/pools.rst b/docs/pools.rst new file mode 100644 index 0000000..63cb7d1 --- /dev/null +++ b/docs/pools.rst @@ -0,0 +1,71 @@ +ConnectionPools +=============== + +.. automodule:: urllib3.connectionpool + +A connection pool is a container for a collection of connections to a specific +host. + +If you need to make requests to the same host repeatedly, then you should use a +:class:`.HTTPConnectionPool`. + +.. doctest :: + + >>> from urllib3 import HTTPConnectionPool + >>> pool = HTTPConnectionPool('ajax.googleapis.com', maxsize=1) + >>> r = pool.request('GET', '/ajax/services/search/web', + ... fields={'q': 'urllib3', 'v': '1.0'}) + >>> r.status + 200 + >>> r.headers['content-type'] + 'text/javascript; charset=utf-8' + >>> 'data: ' + r.data # Content of the response + 'data: ...' + >>> r = pool.request('GET', '/ajax/services/search/web', + ... fields={'q': 'python', 'v': '1.0'}) + >>> 'data: ' + r.data # Content of the response + 'data: ...' + >>> pool.num_connections + 1 + >>> pool.num_requests + 2 + +By default, the pool will cache just one connection. If you're planning on using +such a pool in a multithreaded environment, you should set the ``maxsize`` of +the pool to a higher number, such as the number of threads. You can also control +many other variables like timeout, blocking, and default headers. + +Helpers +------- + +There are various helper functions provided for instantiating these +ConnectionPools more easily: + + .. autofunction:: connection_from_url + +API +--- + +:mod:`urllib3.connectionpool` comes with two connection pools: + + .. autoclass:: HTTPConnectionPool + :members: + :inherited-members: + + .. autoclass:: HTTPSConnectionPool + + +All of these pools inherit from a common base class: + + .. autoclass:: ConnectionPool + +.. module:: urllib3.connection + +Related Classes +--------------- + +urllib3 implements its own :class:`HTTPConnection` object to allow for more +flexibility than the standard library's implementation. + +.. autoclass:: HTTPConnection + :members: diff --git a/docs/security.rst b/docs/security.rst new file mode 100644 index 0000000..5321e24 --- /dev/null +++ b/docs/security.rst @@ -0,0 +1,161 @@ +.. _security: + +Security: Verified HTTPS with SSL/TLS +===================================== + +Very important fact: **By default, urllib3 does not verify HTTPS requests.** + +The historic reason for this is that we rely on ``httplib`` for some of the +HTTP protocol implementation, and ``httplib`` does not verify requests out of +the box. This is not a good reason, but here we are. + +Luckily, it's not too hard to enable verified HTTPS requests and there are a +few ways to do it. + + +Python with SSL enabled +----------------------- + +First we need to make sure your Python installation has SSL enabled. Easiest +way to check is to simply open a Python shell and type `import ssl`:: + + >>> import ssl + Traceback (most recent call last): + ... + ImportError: No module named _ssl + +If you got an ``ImportError``, then your Python is not compiled with SSL support +and you'll need to re-install it. Read +`this StackOverflow thread `_ +for details. + +Otherwise, if ``ssl`` imported cleanly, then we're ready to setup our certificates: +:ref:`certifi-with-urllib3`. + + +Enabling SSL on Google AppEngine +++++++++++++++++++++++++++++++++ + +If you're using Google App Engine, you'll need to add ``ssl`` as a library +dependency to your yaml file, like this:: + + libraries: + - name: ssl + version: latest + +If it's still not working, you may need to enable billing on your account +to `enable using sockets +`_. + + +.. _certifi-with-urllib3: + +Using Certifi with urllib3 +-------------------------- + +`Certifi `_ is a package which ships with Mozilla's root +certificates for easy programmatic access. + +1. Install the Python ``certifi`` package:: + + $ pip install certifi + +2. Setup your pool to require a certificate and provide the certifi bundle:: + + import urllib3 + import certifi + + http = urllib3.PoolManager( + cert_reqs='CERT_REQUIRED', # Force certificate check. + ca_certs=certifi.where(), # Path to the Certifi bundle. + ) + + # You're ready to make verified HTTPS requests. + try: + r = http.request('GET', 'https://example.com/') + except urllib3.exceptions.SSLError as e: + # Handle incorrect certificate error. + ... + +Make sure to update your ``certifi`` package regularly to get the latest root +certificates. + + +Using your system's root certificates +------------------------------------- + +Your system's root certificates may be more up-to-date than maintaining your +own, but the trick is finding where they live. Different operating systems have +them in different places. + +For example, on most Linux distributions they're at +``/etc/ssl/certs/ca-certificates.crt``. On Windows and OS X? `It's not so simple +`_. + +Once you find your root certificate file:: + + import urllib3 + + ca_certs = "/etc/ssl/certs/ca-certificates.crt" # Or wherever it lives. + + http = urllib3.PoolManager( + cert_reqs='CERT_REQUIRED', # Force certificate check. + ca_certs=ca_certs, # Path to your certificate bundle. + ) + + # You're ready to make verified HTTPS requests. + try: + r = http.request('GET', 'https://example.com/') + except urllib3.exceptions.SSLError as e: + # Handle incorrect certificate error. + ... + + +OpenSSL / PyOpenSSL +------------------- + +By default, we use the standard library's ``ssl`` module. Unfortunately, there +are several limitations which are addressed by PyOpenSSL: + +- (Python 2.x) SNI support. +- (Python 2.x-3.2) Disabling compression to mitigate `CRIME attack + `_. + +To use the Python OpenSSL bindings instead, you'll need to install the required +packages:: + + $ pip install pyopenssl ndg-httpsclient pyasn1 + +Once the packages are installed, you can tell urllib3 to switch the ssl backend +to PyOpenSSL with :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`:: + + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + +Now you can continue using urllib3 as you normally would. + +For more details, check the :mod:`~urllib3.contrib.pyopenssl` module. + + +InsecureRequestWarning +---------------------- + +.. versionadded:: 1.9 + +Unverified HTTPS requests will trigger a warning:: + + urllib3/connectionpool.py:736: InsecureRequestWarning: Unverified HTTPS + request is being made. Adding certificate verification is strongly advised. + See: https://urllib3.readthedocs.org/en/latest/security.html + (This warning will only appear once by default.) + +This would be a great time to enable HTTPS verification: +:ref:`certifi-with-urllib3`. + +If you know what you're doing and would like to disable this and other warnings, +you can use :func:`~urllib3.disable_warnings`:: + + import urllib3 + urllib3.disable_warnings() + +Making unverified HTTPS requests is strongly discouraged. ˙ ͜ʟ˙ diff --git a/dummyserver/__init__.pyc b/dummyserver/__init__.pyc new file mode 100644 index 0000000..24e9f56 Binary files /dev/null and b/dummyserver/__init__.pyc differ diff --git a/dummyserver/handlers.pyc b/dummyserver/handlers.pyc new file mode 100644 index 0000000..22aedc3 Binary files /dev/null and b/dummyserver/handlers.pyc differ diff --git a/dummyserver/proxy.pyc b/dummyserver/proxy.pyc new file mode 100644 index 0000000..23fa01d Binary files /dev/null and b/dummyserver/proxy.pyc differ diff --git a/dummyserver/server.pyc b/dummyserver/server.pyc new file mode 100644 index 0000000..b997d0e Binary files /dev/null and b/dummyserver/server.pyc differ diff --git a/dummyserver/testcase.pyc b/dummyserver/testcase.pyc new file mode 100644 index 0000000..29cc06a Binary files /dev/null and b/dummyserver/testcase.pyc differ diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..d56a4d3 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,92 @@ +import warnings +import sys +import errno +import functools +import socket + +from nose.plugins.skip import SkipTest + +from urllib3.exceptions import MaxRetryError, HTTPWarning +from urllib3.packages import six + +# We need a host that will not immediately close the connection with a TCP +# Reset. SO suggests this hostname +TARPIT_HOST = '10.255.255.1' + +VALID_SOURCE_ADDRESSES = [('::1', 0), ('127.0.0.1', 0)] +# RFC 5737: 192.0.2.0/24 is for testing only. +# RFC 3849: 2001:db8::/32 is for documentation only. +INVALID_SOURCE_ADDRESSES = [('192.0.2.255', 0), ('2001:db8::1', 0)] + + +def clear_warnings(cls=HTTPWarning): + new_filters = [] + for f in warnings.filters: + if issubclass(f[2], cls): + continue + new_filters.append(f) + warnings.filters[:] = new_filters + +def setUp(): + clear_warnings() + warnings.simplefilter('ignore', HTTPWarning) + + +def onlyPy26OrOlder(test): + """Skips this test unless you are on Python2.6.x or earlier.""" + + @functools.wraps(test) + def wrapper(*args, **kwargs): + msg = "{name} only runs on Python2.6.x or older".format(name=test.__name__) + if sys.version_info >= (2, 7): + raise SkipTest(msg) + return test(*args, **kwargs) + return wrapper + +def onlyPy27OrNewer(test): + """Skips this test unless you are on Python 2.7.x or later.""" + + @functools.wraps(test) + def wrapper(*args, **kwargs): + msg = "{name} requires Python 2.7.x+ to run".format(name=test.__name__) + if sys.version_info < (2, 7): + raise SkipTest(msg) + return test(*args, **kwargs) + return wrapper + +def onlyPy3(test): + """Skips this test unless you are on Python3.x""" + + @functools.wraps(test) + def wrapper(*args, **kwargs): + msg = "{name} requires Python3.x to run".format(name=test.__name__) + if not six.PY3: + raise SkipTest(msg) + return test(*args, **kwargs) + return wrapper + +def requires_network(test): + """Helps you skip tests that require the network""" + + def _is_unreachable_err(err): + return getattr(err, 'errno', None) in (errno.ENETUNREACH, + errno.EHOSTUNREACH) # For OSX + + @functools.wraps(test) + def wrapper(*args, **kwargs): + msg = "Can't run {name} because the network is unreachable".format( + name=test.__name__) + try: + return test(*args, **kwargs) + except socket.error as e: + # This test needs an initial network connection to attempt the + # connection to the TARPIT_HOST. This fails if you are in a place + # without an Internet connection, so we skip the test in that case. + if _is_unreachable_err(e): + raise SkipTest(msg) + raise + except MaxRetryError as e: + if _is_unreachable_err(e.reason): + raise SkipTest(msg) + raise + return wrapper diff --git a/test/__init__.pyc b/test/__init__.pyc new file mode 100644 index 0000000..38b9317 Binary files /dev/null and b/test/__init__.pyc differ diff --git a/test/benchmark.py b/test/benchmark.py new file mode 100644 index 0000000..242e72f --- /dev/null +++ b/test/benchmark.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +""" +Really simple rudimentary benchmark to compare ConnectionPool versus standard +urllib to demonstrate the usefulness of connection re-using. +""" +from __future__ import print_function + +import sys +import time +import urllib + +sys.path.append('../') +import urllib3 + + +# URLs to download. Doesn't matter as long as they're from the same host, so we +# can take advantage of connection re-using. +TO_DOWNLOAD = [ + 'http://code.google.com/apis/apps/', + 'http://code.google.com/apis/base/', + 'http://code.google.com/apis/blogger/', + 'http://code.google.com/apis/calendar/', + 'http://code.google.com/apis/codesearch/', + 'http://code.google.com/apis/contact/', + 'http://code.google.com/apis/books/', + 'http://code.google.com/apis/documents/', + 'http://code.google.com/apis/finance/', + 'http://code.google.com/apis/health/', + 'http://code.google.com/apis/notebook/', + 'http://code.google.com/apis/picasaweb/', + 'http://code.google.com/apis/spreadsheets/', + 'http://code.google.com/apis/webmastertools/', + 'http://code.google.com/apis/youtube/', +] + + +def urllib_get(url_list): + assert url_list + for url in url_list: + now = time.time() + r = urllib.urlopen(url) + elapsed = time.time() - now + print("Got in %0.3f: %s" % (elapsed, url)) + + +def pool_get(url_list): + assert url_list + pool = urllib3.PoolManager() + for url in url_list: + now = time.time() + r = pool.request('GET', url, assert_same_host=False) + elapsed = time.time() - now + print("Got in %0.3fs: %s" % (elapsed, url)) + + +if __name__ == '__main__': + print("Running pool_get ...") + now = time.time() + pool_get(TO_DOWNLOAD) + pool_elapsed = time.time() - now + + print("Running urllib_get ...") + now = time.time() + urllib_get(TO_DOWNLOAD) + urllib_elapsed = time.time() - now + + print("Completed pool_get in %0.3fs" % pool_elapsed) + print("Completed urllib_get in %0.3fs" % urllib_elapsed) + + +""" +Example results: + +Completed pool_get in 1.163s +Completed urllib_get in 2.318s +""" diff --git a/test/contrib/__init__.py b/test/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/contrib/__init__.pyc b/test/contrib/__init__.pyc new file mode 100644 index 0000000..2d2fd5d Binary files /dev/null and b/test/contrib/__init__.pyc differ diff --git a/test/contrib/test_pyopenssl.py b/test/contrib/test_pyopenssl.py new file mode 100644 index 0000000..5d57527 --- /dev/null +++ b/test/contrib/test_pyopenssl.py @@ -0,0 +1,23 @@ +from nose.plugins.skip import SkipTest +from urllib3.packages import six + +if six.PY3: + raise SkipTest('Testing of PyOpenSSL disabled on PY3') + +try: + from urllib3.contrib.pyopenssl import (inject_into_urllib3, + extract_from_urllib3) +except ImportError as e: + raise SkipTest('Could not import PyOpenSSL: %r' % e) + + +from ..with_dummyserver.test_https import TestHTTPS, TestHTTPS_TLSv1 +from ..with_dummyserver.test_socketlevel import TestSNI, TestSocketClosing + + +def setup_module(): + inject_into_urllib3() + + +def teardown_module(): + extract_from_urllib3() diff --git a/test/contrib/test_pyopenssl.pyc b/test/contrib/test_pyopenssl.pyc new file mode 100644 index 0000000..6441273 Binary files /dev/null and b/test/contrib/test_pyopenssl.pyc differ diff --git a/test/port_helpers.py b/test/port_helpers.py new file mode 100644 index 0000000..e818a9b --- /dev/null +++ b/test/port_helpers.py @@ -0,0 +1,100 @@ +# These helpers are copied from test_support.py in the Python 2.7 standard +# library test suite. + +import socket + + +# Don't use "localhost", since resolving it uses the DNS under recent +# Windows versions (see issue #18792). +HOST = "127.0.0.1" +HOSTv6 = "::1" + +def find_unused_port(family=socket.AF_INET, socktype=socket.SOCK_STREAM): + """Returns an unused port that should be suitable for binding. This is + achieved by creating a temporary socket with the same family and type as + the 'sock' parameter (default is AF_INET, SOCK_STREAM), and binding it to + the specified host address (defaults to 0.0.0.0) with the port set to 0, + eliciting an unused ephemeral port from the OS. The temporary socket is + then closed and deleted, and the ephemeral port is returned. + + Either this method or bind_port() should be used for any tests where a + server socket needs to be bound to a particular port for the duration of + the test. Which one to use depends on whether the calling code is creating + a python socket, or if an unused port needs to be provided in a constructor + or passed to an external program (i.e. the -accept argument to openssl's + s_server mode). Always prefer bind_port() over find_unused_port() where + possible. Hard coded ports should *NEVER* be used. As soon as a server + socket is bound to a hard coded port, the ability to run multiple instances + of the test simultaneously on the same host is compromised, which makes the + test a ticking time bomb in a buildbot environment. On Unix buildbots, this + may simply manifest as a failed test, which can be recovered from without + intervention in most cases, but on Windows, the entire python process can + completely and utterly wedge, requiring someone to log in to the buildbot + and manually kill the affected process. + + (This is easy to reproduce on Windows, unfortunately, and can be traced to + the SO_REUSEADDR socket option having different semantics on Windows versus + Unix/Linux. On Unix, you can't have two AF_INET SOCK_STREAM sockets bind, + listen and then accept connections on identical host/ports. An EADDRINUSE + socket.error will be raised at some point (depending on the platform and + the order bind and listen were called on each socket). + + However, on Windows, if SO_REUSEADDR is set on the sockets, no EADDRINUSE + will ever be raised when attempting to bind two identical host/ports. When + accept() is called on each socket, the second caller's process will steal + the port from the first caller, leaving them both in an awkwardly wedged + state where they'll no longer respond to any signals or graceful kills, and + must be forcibly killed via OpenProcess()/TerminateProcess(). + + The solution on Windows is to use the SO_EXCLUSIVEADDRUSE socket option + instead of SO_REUSEADDR, which effectively affords the same semantics as + SO_REUSEADDR on Unix. Given the propensity of Unix developers in the Open + Source world compared to Windows ones, this is a common mistake. A quick + look over OpenSSL's 0.9.8g source shows that they use SO_REUSEADDR when + openssl.exe is called with the 's_server' option, for example. See + http://bugs.python.org/issue2550 for more info. The following site also + has a very thorough description about the implications of both REUSEADDR + and EXCLUSIVEADDRUSE on Windows: + http://msdn2.microsoft.com/en-us/library/ms740621(VS.85).aspx) + + XXX: although this approach is a vast improvement on previous attempts to + elicit unused ports, it rests heavily on the assumption that the ephemeral + port returned to us by the OS won't immediately be dished back out to some + other process when we close and delete our temporary socket but before our + calling code has a chance to bind the returned port. We can deal with this + issue if/when we come across it.""" + tempsock = socket.socket(family, socktype) + port = bind_port(tempsock) + tempsock.close() + del tempsock + return port + +def bind_port(sock, host=HOST): + """Bind the socket to a free port and return the port number. Relies on + ephemeral ports in order to ensure we are using an unbound port. This is + important as many tests may be running simultaneously, especially in a + buildbot environment. This method raises an exception if the sock.family + is AF_INET and sock.type is SOCK_STREAM, *and* the socket has SO_REUSEADDR + or SO_REUSEPORT set on it. Tests should *never* set these socket options + for TCP/IP sockets. The only case for setting these options is testing + multicasting via multiple UDP sockets. + + Additionally, if the SO_EXCLUSIVEADDRUSE socket option is available (i.e. + on Windows), it will be set on the socket. This will prevent anyone else + from bind()'ing to our host/port for the duration of the test. + """ + if sock.family == socket.AF_INET and sock.type == socket.SOCK_STREAM: + if hasattr(socket, 'SO_REUSEADDR'): + if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR) == 1: + raise ValueError("tests should never set the SO_REUSEADDR " \ + "socket option on TCP/IP sockets!") + if hasattr(socket, 'SO_REUSEPORT'): + if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT) == 1: + raise ValueError("tests should never set the SO_REUSEPORT " \ + "socket option on TCP/IP sockets!") + if hasattr(socket, 'SO_EXCLUSIVEADDRUSE'): + sock.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) + + sock.bind((host, 0)) + port = sock.getsockname()[1] + return port diff --git a/test/port_helpers.pyc b/test/port_helpers.pyc new file mode 100644 index 0000000..7a1c425 Binary files /dev/null and b/test/port_helpers.pyc differ diff --git a/test/test_collections.pyc b/test/test_collections.pyc new file mode 100644 index 0000000..d1ecd73 Binary files /dev/null and b/test/test_collections.pyc differ diff --git a/test/test_compatibility.pyc b/test/test_compatibility.pyc new file mode 100644 index 0000000..2dfdf75 Binary files /dev/null and b/test/test_compatibility.pyc differ diff --git a/test/test_connectionpool.pyc b/test/test_connectionpool.pyc new file mode 100644 index 0000000..e87a3b3 Binary files /dev/null and b/test/test_connectionpool.pyc differ diff --git a/test/test_exceptions.pyc b/test/test_exceptions.pyc new file mode 100644 index 0000000..3274e34 Binary files /dev/null and b/test/test_exceptions.pyc differ diff --git a/test/test_fields.pyc b/test/test_fields.pyc new file mode 100644 index 0000000..4622899 Binary files /dev/null and b/test/test_fields.pyc differ diff --git a/test/test_filepost.pyc b/test/test_filepost.pyc new file mode 100644 index 0000000..ec54472 Binary files /dev/null and b/test/test_filepost.pyc differ diff --git a/test/test_poolmanager.pyc b/test/test_poolmanager.pyc new file mode 100644 index 0000000..077c2ac Binary files /dev/null and b/test/test_poolmanager.pyc differ diff --git a/test/test_proxymanager.pyc b/test/test_proxymanager.pyc new file mode 100644 index 0000000..3696ee8 Binary files /dev/null and b/test/test_proxymanager.pyc differ diff --git a/test/test_response.py b/test/test_response.py index ad134ee..7d67c93 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -182,6 +182,37 @@ class TestResponse(unittest.TestCase): br.close() self.assertEqual(resp.closed, True) + b = b'fooandahalf' + fp = BytesIO(b) + resp = HTTPResponse(fp, preload_content=False) + br = BufferedReader(resp, 5) + + br.read(1) # sets up the buffer, reading 5 + self.assertEqual(len(fp.read()), len(b) - 5) + + # This is necessary to make sure the "no bytes left" part of `readinto` + # gets tested. + while not br.closed: + br.read(5) + + def test_io_readinto(self): + # This test is necessary because in py2.6, `readinto` doesn't get called + # in `test_io_bufferedreader` like it does for all the other python + # versions. Probably this is because the `io` module in py2.6 is an + # old version that has a different underlying implementation. + + + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + + barr = bytearray(3) + assert resp.readinto(barr) == 3 + assert b'foo' == barr + + # The reader should already be empty, so this should read nothing. + assert resp.readinto(barr) == 0 + assert b'foo' == barr + def test_streaming(self): fp = BytesIO(b'foo') resp = HTTPResponse(fp, preload_content=False) diff --git a/test/test_response.pyc b/test/test_response.pyc new file mode 100644 index 0000000..99e5c0e Binary files /dev/null and b/test/test_response.pyc differ diff --git a/test/test_retry.pyc b/test/test_retry.pyc new file mode 100644 index 0000000..398c010 Binary files /dev/null and b/test/test_retry.pyc differ diff --git a/test/test_util.py b/test/test_util.py index 388d877..1811dbd 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -21,6 +21,8 @@ from urllib3.exceptions import ( InsecureRequestWarning, ) +from urllib3.util import is_fp_closed + from . import clear_warnings # This number represents a time in seconds, it doesn't mean anything in @@ -324,3 +326,32 @@ class TestUtil(unittest.TestCase): self.assertEqual(resolve_cert_reqs('REQUIRED'), ssl.CERT_REQUIRED) self.assertEqual(resolve_cert_reqs('CERT_REQUIRED'), ssl.CERT_REQUIRED) + def test_is_fp_closed_object_supports_closed(self): + class ClosedFile(object): + @property + def closed(self): + return True + + self.assertTrue(is_fp_closed(ClosedFile())) + + def test_is_fp_closed_object_has_none_fp(self): + class NoneFpFile(object): + @property + def fp(self): + return None + + self.assertTrue(is_fp_closed(NoneFpFile())) + + def test_is_fp_closed_object_has_fp(self): + class FpFile(object): + @property + def fp(self): + return True + + self.assertTrue(not is_fp_closed(FpFile())) + + def test_is_fp_closed_object_has_neither_fp_nor_closed(self): + class NotReallyAFile(object): + pass + + self.assertRaises(ValueError, is_fp_closed, NotReallyAFile()) diff --git a/test/test_util.pyc b/test/test_util.pyc new file mode 100644 index 0000000..0500c3b Binary files /dev/null and b/test/test_util.pyc differ diff --git a/test/with_dummyserver/__init__.py b/test/with_dummyserver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/with_dummyserver/__init__.pyc b/test/with_dummyserver/__init__.pyc new file mode 100644 index 0000000..833be60 Binary files /dev/null and b/test/with_dummyserver/__init__.pyc differ diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py new file mode 100644 index 0000000..7d54fbf --- /dev/null +++ b/test/with_dummyserver/test_connectionpool.py @@ -0,0 +1,706 @@ +import errno +import logging +import socket +import sys +import unittest +import time + +import mock + +try: + from urllib.parse import urlencode +except: + from urllib import urlencode + +from .. import ( + requires_network, + onlyPy3, onlyPy27OrNewer, onlyPy26OrOlder, + TARPIT_HOST, VALID_SOURCE_ADDRESSES, INVALID_SOURCE_ADDRESSES, +) +from ..port_helpers import find_unused_port +from urllib3 import ( + encode_multipart_formdata, + HTTPConnectionPool, +) +from urllib3.exceptions import ( + ConnectTimeoutError, + EmptyPoolError, + DecodeError, + MaxRetryError, + ReadTimeoutError, + ProtocolError, +) +from urllib3.packages.six import b, u +from urllib3.util.retry import Retry +from urllib3.util.timeout import Timeout + +import tornado +from dummyserver.testcase import HTTPDummyServerTestCase + +from nose.tools import timed + +log = logging.getLogger('urllib3.connectionpool') +log.setLevel(logging.NOTSET) +log.addHandler(logging.StreamHandler(sys.stdout)) + + +class TestConnectionPool(HTTPDummyServerTestCase): + + def setUp(self): + self.pool = HTTPConnectionPool(self.host, self.port) + + def test_get(self): + r = self.pool.request('GET', '/specific_method', + fields={'method': 'GET'}) + self.assertEqual(r.status, 200, r.data) + + def test_post_url(self): + r = self.pool.request('POST', '/specific_method', + fields={'method': 'POST'}) + self.assertEqual(r.status, 200, r.data) + + def test_urlopen_put(self): + r = self.pool.urlopen('PUT', '/specific_method?method=PUT') + self.assertEqual(r.status, 200, r.data) + + def test_wrong_specific_method(self): + # To make sure the dummy server is actually returning failed responses + r = self.pool.request('GET', '/specific_method', + fields={'method': 'POST'}) + self.assertEqual(r.status, 400, r.data) + + r = self.pool.request('POST', '/specific_method', + fields={'method': 'GET'}) + self.assertEqual(r.status, 400, r.data) + + def test_upload(self): + data = "I'm in ur multipart form-data, hazing a cheezburgr" + fields = { + 'upload_param': 'filefield', + 'upload_filename': 'lolcat.txt', + 'upload_size': len(data), + 'filefield': ('lolcat.txt', data), + } + + r = self.pool.request('POST', '/upload', fields=fields) + self.assertEqual(r.status, 200, r.data) + + def test_one_name_multiple_values(self): + fields = [ + ('foo', 'a'), + ('foo', 'b'), + ] + + # urlencode + r = self.pool.request('GET', '/echo', fields=fields) + self.assertEqual(r.data, b'foo=a&foo=b') + + # multipart + r = self.pool.request('POST', '/echo', fields=fields) + self.assertEqual(r.data.count(b'name="foo"'), 2) + + + def test_unicode_upload(self): + fieldname = u('myfile') + filename = u('\xe2\x99\xa5.txt') + data = u('\xe2\x99\xa5').encode('utf8') + size = len(data) + + fields = { + u('upload_param'): fieldname, + u('upload_filename'): filename, + u('upload_size'): size, + fieldname: (filename, data), + } + + r = self.pool.request('POST', '/upload', fields=fields) + self.assertEqual(r.status, 200, r.data) + + def test_timeout_float(self): + url = '/sleep?seconds=0.005' + # Pool-global timeout + pool = HTTPConnectionPool(self.host, self.port, timeout=0.001, retries=False) + self.assertRaises(ReadTimeoutError, pool.request, 'GET', url) + + def test_conn_closed(self): + pool = HTTPConnectionPool(self.host, self.port, timeout=0.001, retries=False) + conn = pool._get_conn() + pool._put_conn(conn) + try: + url = '/sleep?seconds=0.005' + pool.urlopen('GET', url) + self.fail("The request should fail with a timeout error.") + except ReadTimeoutError: + if conn.sock: + self.assertRaises(socket.error, conn.sock.recv, 1024) + finally: + pool._put_conn(conn) + + def test_nagle(self): + """ Test that connections have TCP_NODELAY turned on """ + # This test needs to be here in order to be run. socket.create_connection actually tries to + # connect to the host provided so we need a dummyserver to be running. + pool = HTTPConnectionPool(self.host, self.port) + conn = pool._get_conn() + pool._make_request(conn, 'GET', '/') + tcp_nodelay_setting = conn.sock.getsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY) + assert tcp_nodelay_setting > 0, ("Expected TCP_NODELAY to be set on the " + "socket (with value greater than 0) " + "but instead was %s" % + tcp_nodelay_setting) + + def test_socket_options(self): + """Test that connections accept socket options.""" + # This test needs to be here in order to be run. socket.create_connection actually tries to + # connect to the host provided so we need a dummyserver to be running. + pool = HTTPConnectionPool(self.host, self.port, socket_options=[ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + ]) + s = pool._new_conn()._new_conn() # Get the socket + using_keepalive = s.getsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE) > 0 + self.assertTrue(using_keepalive) + s.close() + + def test_disable_default_socket_options(self): + """Test that passing None disables all socket options.""" + # This test needs to be here in order to be run. socket.create_connection actually tries to + # connect to the host provided so we need a dummyserver to be running. + pool = HTTPConnectionPool(self.host, self.port, socket_options=None) + s = pool._new_conn()._new_conn() + using_nagle = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY) == 0 + self.assertTrue(using_nagle) + s.close() + + def test_defaults_are_applied(self): + """Test that modifying the default socket options works.""" + # This test needs to be here in order to be run. socket.create_connection actually tries to + # connect to the host provided so we need a dummyserver to be running. + pool = HTTPConnectionPool(self.host, self.port) + # Get the HTTPConnection instance + conn = pool._new_conn() + # Update the default socket options + conn.default_socket_options += [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)] + s = conn._new_conn() + nagle_disabled = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY) > 0 + using_keepalive = s.getsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE) > 0 + self.assertTrue(nagle_disabled) + self.assertTrue(using_keepalive) + + @timed(0.5) + def test_timeout(self): + """ Requests should time out when expected """ + url = '/sleep?seconds=0.002' + timeout = Timeout(read=0.001) + + # Pool-global timeout + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout, retries=False) + + conn = pool._get_conn() + self.assertRaises(ReadTimeoutError, pool._make_request, + conn, 'GET', url) + pool._put_conn(conn) + + time.sleep(0.02) # Wait for server to start receiving again. :( + + self.assertRaises(ReadTimeoutError, pool.request, 'GET', url) + + # Request-specific timeouts should raise errors + pool = HTTPConnectionPool(self.host, self.port, timeout=0.1, retries=False) + + conn = pool._get_conn() + self.assertRaises(ReadTimeoutError, pool._make_request, + conn, 'GET', url, timeout=timeout) + pool._put_conn(conn) + + time.sleep(0.02) # Wait for server to start receiving again. :( + + self.assertRaises(ReadTimeoutError, pool.request, + 'GET', url, timeout=timeout) + + # Timeout int/float passed directly to request and _make_request should + # raise a request timeout + self.assertRaises(ReadTimeoutError, pool.request, + 'GET', url, timeout=0.001) + conn = pool._new_conn() + self.assertRaises(ReadTimeoutError, pool._make_request, conn, + 'GET', url, timeout=0.001) + pool._put_conn(conn) + + # Timeout int/float passed directly to _make_request should not raise a + # request timeout if it's a high value + pool.request('GET', url, timeout=1) + + @requires_network + @timed(0.5) + def test_connect_timeout(self): + url = '/sleep?seconds=0.005' + timeout = Timeout(connect=0.001) + + # Pool-global timeout + pool = HTTPConnectionPool(TARPIT_HOST, self.port, timeout=timeout) + conn = pool._get_conn() + self.assertRaises(ConnectTimeoutError, pool._make_request, conn, 'GET', url) + + # Retries + retries = Retry(connect=0) + self.assertRaises(MaxRetryError, pool.request, 'GET', url, + retries=retries) + + # Request-specific connection timeouts + big_timeout = Timeout(read=0.2, connect=0.2) + pool = HTTPConnectionPool(TARPIT_HOST, self.port, + timeout=big_timeout, retries=False) + conn = pool._get_conn() + self.assertRaises(ConnectTimeoutError, pool._make_request, conn, 'GET', + url, timeout=timeout) + + pool._put_conn(conn) + self.assertRaises(ConnectTimeoutError, pool.request, 'GET', url, + timeout=timeout) + + + def test_connection_error_retries(self): + """ ECONNREFUSED error should raise a connection error, with retries """ + port = find_unused_port() + pool = HTTPConnectionPool(self.host, port) + try: + pool.request('GET', '/', retries=Retry(connect=3)) + self.fail("Should have failed with a connection error.") + except MaxRetryError as e: + self.assertTrue(isinstance(e.reason, ProtocolError)) + self.assertEqual(e.reason.args[1].errno, errno.ECONNREFUSED) + + def test_timeout_reset(self): + """ If the read timeout isn't set, socket timeout should reset """ + url = '/sleep?seconds=0.005' + timeout = Timeout(connect=0.001) + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + conn = pool._get_conn() + try: + pool._make_request(conn, 'GET', url) + except ReadTimeoutError: + self.fail("This request shouldn't trigger a read timeout.") + + @requires_network + @timed(5.0) + def test_total_timeout(self): + url = '/sleep?seconds=0.005' + + timeout = Timeout(connect=3, read=5, total=0.001) + pool = HTTPConnectionPool(TARPIT_HOST, self.port, timeout=timeout) + conn = pool._get_conn() + self.assertRaises(ConnectTimeoutError, pool._make_request, conn, 'GET', url) + + # This will get the socket to raise an EAGAIN on the read + timeout = Timeout(connect=3, read=0) + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + conn = pool._get_conn() + self.assertRaises(ReadTimeoutError, pool._make_request, conn, 'GET', url) + + # The connect should succeed and this should hit the read timeout + timeout = Timeout(connect=3, read=5, total=0.002) + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + conn = pool._get_conn() + self.assertRaises(ReadTimeoutError, pool._make_request, conn, 'GET', url) + + @requires_network + def test_none_total_applies_connect(self): + url = '/sleep?seconds=0.005' + timeout = Timeout(total=None, connect=0.001) + pool = HTTPConnectionPool(TARPIT_HOST, self.port, timeout=timeout) + conn = pool._get_conn() + self.assertRaises(ConnectTimeoutError, pool._make_request, conn, 'GET', + url) + + def test_timeout_success(self): + timeout = Timeout(connect=3, read=5, total=None) + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + pool.request('GET', '/') + # This should not raise a "Timeout already started" error + pool.request('GET', '/') + + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + # This should also not raise a "Timeout already started" error + pool.request('GET', '/') + + timeout = Timeout(total=None) + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + pool.request('GET', '/') + + def test_tunnel(self): + # note the actual httplib.py has no tests for this functionality + timeout = Timeout(total=None) + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + conn = pool._get_conn() + try: + conn.set_tunnel(self.host, self.port) + except AttributeError: # python 2.6 + conn._set_tunnel(self.host, self.port) + + conn._tunnel = mock.Mock(return_value=None) + pool._make_request(conn, 'GET', '/') + conn._tunnel.assert_called_once_with() + + # test that it's not called when tunnel is not set + timeout = Timeout(total=None) + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + conn = pool._get_conn() + + conn._tunnel = mock.Mock(return_value=None) + pool._make_request(conn, 'GET', '/') + self.assertEqual(conn._tunnel.called, False) + + def test_redirect(self): + r = self.pool.request('GET', '/redirect', fields={'target': '/'}, redirect=False) + self.assertEqual(r.status, 303) + + r = self.pool.request('GET', '/redirect', fields={'target': '/'}) + self.assertEqual(r.status, 200) + self.assertEqual(r.data, b'Dummy server!') + + def test_bad_connect(self): + pool = HTTPConnectionPool('badhost.invalid', self.port) + try: + pool.request('GET', '/', retries=5) + self.fail("should raise timeout exception here") + except MaxRetryError as e: + self.assertTrue(isinstance(e.reason, ProtocolError), e.reason) + + def test_keepalive(self): + pool = HTTPConnectionPool(self.host, self.port, block=True, maxsize=1) + + r = pool.request('GET', '/keepalive?close=0') + r = pool.request('GET', '/keepalive?close=0') + + self.assertEqual(r.status, 200) + self.assertEqual(pool.num_connections, 1) + self.assertEqual(pool.num_requests, 2) + + def test_keepalive_close(self): + pool = HTTPConnectionPool(self.host, self.port, + block=True, maxsize=1, timeout=2) + + r = pool.request('GET', '/keepalive?close=1', retries=0, + headers={ + "Connection": "close", + }) + + self.assertEqual(pool.num_connections, 1) + + # The dummyserver will have responded with Connection:close, + # and httplib will properly cleanup the socket. + + # We grab the HTTPConnection object straight from the Queue, + # because _get_conn() is where the check & reset occurs + # pylint: disable-msg=W0212 + conn = pool.pool.get() + self.assertEqual(conn.sock, None) + pool._put_conn(conn) + + # Now with keep-alive + r = pool.request('GET', '/keepalive?close=0', retries=0, + headers={ + "Connection": "keep-alive", + }) + + # The dummyserver responded with Connection:keep-alive, the connection + # persists. + conn = pool.pool.get() + self.assertNotEqual(conn.sock, None) + pool._put_conn(conn) + + # Another request asking the server to close the connection. This one + # should get cleaned up for the next request. + r = pool.request('GET', '/keepalive?close=1', retries=0, + headers={ + "Connection": "close", + }) + + self.assertEqual(r.status, 200) + + conn = pool.pool.get() + self.assertEqual(conn.sock, None) + pool._put_conn(conn) + + # Next request + r = pool.request('GET', '/keepalive?close=0') + + def test_post_with_urlencode(self): + data = {'banana': 'hammock', 'lol': 'cat'} + r = self.pool.request('POST', '/echo', fields=data, encode_multipart=False) + self.assertEqual(r.data.decode('utf-8'), urlencode(data)) + + def test_post_with_multipart(self): + data = {'banana': 'hammock', 'lol': 'cat'} + r = self.pool.request('POST', '/echo', + fields=data, + encode_multipart=True) + body = r.data.split(b'\r\n') + + encoded_data = encode_multipart_formdata(data)[0] + expected_body = encoded_data.split(b'\r\n') + + # TODO: Get rid of extra parsing stuff when you can specify + # a custom boundary to encode_multipart_formdata + """ + We need to loop the return lines because a timestamp is attached + from within encode_multipart_formdata. When the server echos back + the data, it has the timestamp from when the data was encoded, which + is not equivalent to when we run encode_multipart_formdata on + the data again. + """ + for i, line in enumerate(body): + if line.startswith(b'--'): + continue + + self.assertEqual(body[i], expected_body[i]) + + def test_check_gzip(self): + r = self.pool.request('GET', '/encodingrequest', + headers={'accept-encoding': 'gzip'}) + self.assertEqual(r.headers.get('content-encoding'), 'gzip') + self.assertEqual(r.data, b'hello, world!') + + def test_check_deflate(self): + r = self.pool.request('GET', '/encodingrequest', + headers={'accept-encoding': 'deflate'}) + self.assertEqual(r.headers.get('content-encoding'), 'deflate') + self.assertEqual(r.data, b'hello, world!') + + def test_bad_decode(self): + self.assertRaises(DecodeError, self.pool.request, + 'GET', '/encodingrequest', + headers={'accept-encoding': 'garbage-deflate'}) + + self.assertRaises(DecodeError, self.pool.request, + 'GET', '/encodingrequest', + headers={'accept-encoding': 'garbage-gzip'}) + + def test_connection_count(self): + pool = HTTPConnectionPool(self.host, self.port, maxsize=1) + + pool.request('GET', '/') + pool.request('GET', '/') + pool.request('GET', '/') + + self.assertEqual(pool.num_connections, 1) + self.assertEqual(pool.num_requests, 3) + + def test_connection_count_bigpool(self): + http_pool = HTTPConnectionPool(self.host, self.port, maxsize=16) + + http_pool.request('GET', '/') + http_pool.request('GET', '/') + http_pool.request('GET', '/') + + self.assertEqual(http_pool.num_connections, 1) + self.assertEqual(http_pool.num_requests, 3) + + def test_partial_response(self): + pool = HTTPConnectionPool(self.host, self.port, maxsize=1) + + req_data = {'lol': 'cat'} + resp_data = urlencode(req_data).encode('utf-8') + + r = pool.request('GET', '/echo', fields=req_data, preload_content=False) + + self.assertEqual(r.read(5), resp_data[:5]) + self.assertEqual(r.read(), resp_data[5:]) + + def test_lazy_load_twice(self): + # This test is sad and confusing. Need to figure out what's + # going on with partial reads and socket reuse. + + pool = HTTPConnectionPool(self.host, self.port, block=True, maxsize=1, timeout=2) + + payload_size = 1024 * 2 + first_chunk = 512 + + boundary = 'foo' + + req_data = {'count': 'a' * payload_size} + resp_data = encode_multipart_formdata(req_data, boundary=boundary)[0] + + req2_data = {'count': 'b' * payload_size} + resp2_data = encode_multipart_formdata(req2_data, boundary=boundary)[0] + + r1 = pool.request('POST', '/echo', fields=req_data, multipart_boundary=boundary, preload_content=False) + + self.assertEqual(r1.read(first_chunk), resp_data[:first_chunk]) + + try: + r2 = pool.request('POST', '/echo', fields=req2_data, multipart_boundary=boundary, + preload_content=False, pool_timeout=0.001) + + # This branch should generally bail here, but maybe someday it will + # work? Perhaps by some sort of magic. Consider it a TODO. + + self.assertEqual(r2.read(first_chunk), resp2_data[:first_chunk]) + + self.assertEqual(r1.read(), resp_data[first_chunk:]) + self.assertEqual(r2.read(), resp2_data[first_chunk:]) + self.assertEqual(pool.num_requests, 2) + + except EmptyPoolError: + self.assertEqual(r1.read(), resp_data[first_chunk:]) + self.assertEqual(pool.num_requests, 1) + + self.assertEqual(pool.num_connections, 1) + + def test_for_double_release(self): + MAXSIZE=5 + + # Check default state + pool = HTTPConnectionPool(self.host, self.port, maxsize=MAXSIZE) + self.assertEqual(pool.num_connections, 0) + self.assertEqual(pool.pool.qsize(), MAXSIZE) + + # Make an empty slot for testing + pool.pool.get() + self.assertEqual(pool.pool.qsize(), MAXSIZE-1) + + # Check state after simple request + pool.urlopen('GET', '/') + self.assertEqual(pool.pool.qsize(), MAXSIZE-1) + + # Check state without release + pool.urlopen('GET', '/', preload_content=False) + self.assertEqual(pool.pool.qsize(), MAXSIZE-2) + + pool.urlopen('GET', '/') + self.assertEqual(pool.pool.qsize(), MAXSIZE-2) + + # Check state after read + pool.urlopen('GET', '/').data + self.assertEqual(pool.pool.qsize(), MAXSIZE-2) + + pool.urlopen('GET', '/') + self.assertEqual(pool.pool.qsize(), MAXSIZE-2) + + def test_release_conn_parameter(self): + MAXSIZE=5 + pool = HTTPConnectionPool(self.host, self.port, maxsize=MAXSIZE) + self.assertEqual(pool.pool.qsize(), MAXSIZE) + + # Make request without releasing connection + pool.request('GET', '/', release_conn=False, preload_content=False) + self.assertEqual(pool.pool.qsize(), MAXSIZE-1) + + def test_dns_error(self): + pool = HTTPConnectionPool('thishostdoesnotexist.invalid', self.port, timeout=0.001) + self.assertRaises(MaxRetryError, pool.request, 'GET', '/test', retries=2) + + def test_source_address(self): + for addr in VALID_SOURCE_ADDRESSES: + pool = HTTPConnectionPool(self.host, self.port, + source_address=addr, retries=False) + r = pool.request('GET', '/source_address') + assert r.data == b(addr[0]), ( + "expected the response to contain the source address {addr}, " + "but was {data}".format(data=r.data, addr=b(addr[0]))) + + def test_source_address_error(self): + for addr in INVALID_SOURCE_ADDRESSES: + pool = HTTPConnectionPool(self.host, self.port, + source_address=addr, retries=False) + self.assertRaises(ProtocolError, + pool.request, 'GET', '/source_address') + + @onlyPy3 + def test_httplib_headers_case_insensitive(self): + HEADERS = {'Content-Length': '0', 'Content-type': 'text/plain', + 'Server': 'TornadoServer/%s' % tornado.version} + r = self.pool.request('GET', '/specific_method', + fields={'method': 'GET'}) + self.assertEqual(HEADERS, dict(r.headers.items())) # to preserve case sensitivity + + +class TestRetry(HTTPDummyServerTestCase): + def setUp(self): + self.pool = HTTPConnectionPool(self.host, self.port) + + def test_max_retry(self): + try: + r = self.pool.request('GET', '/redirect', + fields={'target': '/'}, + retries=0) + self.fail("Failed to raise MaxRetryError exception, returned %r" % r.status) + except MaxRetryError: + pass + + def test_disabled_retry(self): + """ Disabled retries should disable redirect handling. """ + r = self.pool.request('GET', '/redirect', + fields={'target': '/'}, + retries=False) + self.assertEqual(r.status, 303) + + r = self.pool.request('GET', '/redirect', + fields={'target': '/'}, + retries=Retry(redirect=False)) + self.assertEqual(r.status, 303) + + pool = HTTPConnectionPool('thishostdoesnotexist.invalid', self.port, timeout=0.001) + self.assertRaises(ProtocolError, pool.request, 'GET', '/test', retries=False) + + def test_read_retries(self): + """ Should retry for status codes in the whitelist """ + retry = Retry(read=1, status_forcelist=[418]) + resp = self.pool.request('GET', '/successful_retry', + headers={'test-name': 'test_read_retries'}, + retries=retry) + self.assertEqual(resp.status, 200) + + def test_read_total_retries(self): + """ HTTP response w/ status code in the whitelist should be retried """ + headers = {'test-name': 'test_read_total_retries'} + retry = Retry(total=1, status_forcelist=[418]) + resp = self.pool.request('GET', '/successful_retry', + headers=headers, retries=retry) + self.assertEqual(resp.status, 200) + + def test_retries_wrong_whitelist(self): + """HTTP response w/ status code not in whitelist shouldn't be retried""" + retry = Retry(total=1, status_forcelist=[202]) + resp = self.pool.request('GET', '/successful_retry', + headers={'test-name': 'test_wrong_whitelist'}, + retries=retry) + self.assertEqual(resp.status, 418) + + def test_default_method_whitelist_retried(self): + """ urllib3 should retry methods in the default method whitelist """ + retry = Retry(total=1, status_forcelist=[418]) + resp = self.pool.request('OPTIONS', '/successful_retry', + headers={'test-name': 'test_default_whitelist'}, + retries=retry) + self.assertEqual(resp.status, 200) + + def test_retries_wrong_method_list(self): + """Method not in our whitelist should not be retried, even if code matches""" + headers = {'test-name': 'test_wrong_method_whitelist'} + retry = Retry(total=1, status_forcelist=[418], + method_whitelist=['POST']) + resp = self.pool.request('GET', '/successful_retry', + headers=headers, retries=retry) + self.assertEqual(resp.status, 418) + + def test_read_retries_unsuccessful(self): + headers = {'test-name': 'test_read_retries_unsuccessful'} + resp = self.pool.request('GET', '/successful_retry', + headers=headers, retries=1) + self.assertEqual(resp.status, 418) + + def test_retry_reuse_safe(self): + """ It should be possible to reuse a Retry object across requests """ + headers = {'test-name': 'test_retry_safe'} + retry = Retry(total=1, status_forcelist=[418]) + resp = self.pool.request('GET', '/successful_retry', + headers=headers, retries=retry) + self.assertEqual(resp.status, 200) + resp = self.pool.request('GET', '/successful_retry', + headers=headers, retries=retry) + self.assertEqual(resp.status, 200) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/with_dummyserver/test_connectionpool.pyc b/test/with_dummyserver/test_connectionpool.pyc new file mode 100644 index 0000000..b8c38e9 Binary files /dev/null and b/test/with_dummyserver/test_connectionpool.pyc differ diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py new file mode 100644 index 0000000..cf3eee7 --- /dev/null +++ b/test/with_dummyserver/test_https.py @@ -0,0 +1,374 @@ +import datetime +import logging +import ssl +import sys +import unittest +import warnings + +import mock +from nose.plugins.skip import SkipTest + +from dummyserver.testcase import HTTPSDummyServerTestCase +from dummyserver.server import DEFAULT_CA, DEFAULT_CA_BAD, DEFAULT_CERTS + +from test import ( + onlyPy26OrOlder, + requires_network, + TARPIT_HOST, + clear_warnings, +) +from urllib3 import HTTPSConnectionPool +from urllib3.connection import ( + VerifiedHTTPSConnection, + UnverifiedHTTPSConnection, + RECENT_DATE, +) +from urllib3.exceptions import ( + SSLError, + ReadTimeoutError, + ConnectTimeoutError, + InsecureRequestWarning, + SystemTimeWarning, +) +from urllib3.util.timeout import Timeout + + +log = logging.getLogger('urllib3.connectionpool') +log.setLevel(logging.NOTSET) +log.addHandler(logging.StreamHandler(sys.stdout)) + + + +class TestHTTPS(HTTPSDummyServerTestCase): + def setUp(self): + self._pool = HTTPSConnectionPool(self.host, self.port) + + def test_simple(self): + r = self._pool.request('GET', '/') + self.assertEqual(r.status, 200, r.data) + + def test_set_ssl_version_to_tlsv1(self): + self._pool.ssl_version = ssl.PROTOCOL_TLSv1 + r = self._pool.request('GET', '/') + self.assertEqual(r.status, 200, r.data) + + def test_verified(self): + https_pool = HTTPSConnectionPool(self.host, self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + conn = https_pool._new_conn() + self.assertEqual(conn.__class__, VerifiedHTTPSConnection) + + with mock.patch('warnings.warn') as warn: + r = https_pool.request('GET', '/') + self.assertEqual(r.status, 200) + self.assertFalse(warn.called, warn.call_args_list) + + def test_invalid_common_name(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + try: + https_pool.request('GET', '/') + self.fail("Didn't raise SSL invalid common name") + except SSLError as e: + self.assertTrue("doesn't match" in str(e)) + + def test_verified_with_bad_ca_certs(self): + https_pool = HTTPSConnectionPool(self.host, self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA_BAD) + + try: + https_pool.request('GET', '/') + self.fail("Didn't raise SSL error with bad CA certs") + except SSLError as e: + self.assertTrue('certificate verify failed' in str(e), + "Expected 'certificate verify failed'," + "instead got: %r" % e) + + def test_verified_without_ca_certs(self): + # default is cert_reqs=None which is ssl.CERT_NONE + https_pool = HTTPSConnectionPool(self.host, self.port, + cert_reqs='CERT_REQUIRED') + + try: + https_pool.request('GET', '/') + self.fail("Didn't raise SSL error with no CA certs when" + "CERT_REQUIRED is set") + except SSLError as e: + # there is a different error message depending on whether or + # not pyopenssl is injected + self.assertTrue('No root certificates specified' in str(e) or + 'certificate verify failed' in str(e), + "Expected 'No root certificates specified' or " + "'certificate verify failed', " + "instead got: %r" % e) + + def test_no_ssl(self): + pool = HTTPSConnectionPool(self.host, self.port) + pool.ConnectionCls = None + self.assertRaises(SSLError, pool._new_conn) + self.assertRaises(SSLError, pool.request, 'GET', '/') + + def test_unverified_ssl(self): + """ Test that bare HTTPSConnection can connect, make requests """ + pool = HTTPSConnectionPool(self.host, self.port) + pool.ConnectionCls = UnverifiedHTTPSConnection + + with mock.patch('warnings.warn') as warn: + r = pool.request('GET', '/') + self.assertEqual(r.status, 200) + self.assertTrue(warn.called) + + call, = warn.call_args_list + category = call[0][1] + self.assertEqual(category, InsecureRequestWarning) + + def test_ssl_unverified_with_ca_certs(self): + pool = HTTPSConnectionPool(self.host, self.port, + cert_reqs='CERT_NONE', + ca_certs=DEFAULT_CA_BAD) + + with mock.patch('warnings.warn') as warn: + r = pool.request('GET', '/') + self.assertEqual(r.status, 200) + self.assertTrue(warn.called) + + call, = warn.call_args_list + category = call[0][1] + self.assertEqual(category, InsecureRequestWarning) + + @requires_network + def test_ssl_verified_with_platform_ca_certs(self): + """ + We should rely on the platform CA file to validate authenticity of SSL + certificates. Since this file is used by many components of the OS, + such as curl, apt-get, etc., we decided to not touch it, in order to + not compromise the security of the OS running the test suite (typically + urllib3 developer's OS). + + This test assumes that httpbin.org uses a certificate signed by a well + known Certificate Authority. + """ + try: + import urllib3.contrib.pyopenssl + except ImportError: + raise SkipTest('Test requires PyOpenSSL') + if (urllib3.connection.ssl_wrap_socket is + urllib3.contrib.pyopenssl.orig_connection_ssl_wrap_socket): + # Not patched + raise SkipTest('Test should only be run after PyOpenSSL ' + 'monkey patching') + + https_pool = HTTPSConnectionPool('httpbin.org', 443, + cert_reqs=ssl.CERT_REQUIRED) + + https_pool.request('HEAD', '/') + + def test_assert_hostname_false(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + https_pool.assert_hostname = False + https_pool.request('GET', '/') + + def test_assert_specific_hostname(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + https_pool.assert_hostname = 'localhost' + https_pool.request('GET', '/') + + def test_assert_fingerprint_md5(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + https_pool.assert_fingerprint = 'CA:84:E1:AD0E5a:ef:2f:C3:09' \ + ':E7:30:F8:CD:C8:5B' + https_pool.request('GET', '/') + + def test_assert_fingerprint_sha1(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + https_pool.assert_fingerprint = 'CC:45:6A:90:82:F7FF:C0:8218:8e:' \ + '7A:F2:8A:D7:1E:07:33:67:DE' + https_pool.request('GET', '/') + + def test_assert_invalid_fingerprint(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + https_pool.assert_fingerprint = 'AA:AA:AA:AA:AA:AAAA:AA:AAAA:AA:' \ + 'AA:AA:AA:AA:AA:AA:AA:AA:AA' + + self.assertRaises(SSLError, https_pool.request, 'GET', '/') + https_pool._get_conn() + + # Uneven length + https_pool.assert_fingerprint = 'AA:A' + self.assertRaises(SSLError, https_pool.request, 'GET', '/') + https_pool._get_conn() + + # Invalid length + https_pool.assert_fingerprint = 'AA' + self.assertRaises(SSLError, https_pool.request, 'GET', '/') + + def test_verify_none_and_bad_fingerprint(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_NONE', + ca_certs=DEFAULT_CA_BAD) + + https_pool.assert_fingerprint = 'AA:AA:AA:AA:AA:AAAA:AA:AAAA:AA:' \ + 'AA:AA:AA:AA:AA:AA:AA:AA:AA' + self.assertRaises(SSLError, https_pool.request, 'GET', '/') + + def test_verify_none_and_good_fingerprint(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_NONE', + ca_certs=DEFAULT_CA_BAD) + + https_pool.assert_fingerprint = 'CC:45:6A:90:82:F7FF:C0:8218:8e:' \ + '7A:F2:8A:D7:1E:07:33:67:DE' + https_pool.request('GET', '/') + + @requires_network + def test_https_timeout(self): + timeout = Timeout(connect=0.001) + https_pool = HTTPSConnectionPool(TARPIT_HOST, self.port, + timeout=timeout, retries=False, + cert_reqs='CERT_REQUIRED') + + timeout = Timeout(total=None, connect=0.001) + https_pool = HTTPSConnectionPool(TARPIT_HOST, self.port, + timeout=timeout, retries=False, + cert_reqs='CERT_REQUIRED') + self.assertRaises(ConnectTimeoutError, https_pool.request, 'GET', '/') + + timeout = Timeout(read=0.001) + https_pool = HTTPSConnectionPool(self.host, self.port, + timeout=timeout, retries=False, + cert_reqs='CERT_REQUIRED') + https_pool.ca_certs = DEFAULT_CA + https_pool.assert_fingerprint = 'CC:45:6A:90:82:F7FF:C0:8218:8e:' \ + '7A:F2:8A:D7:1E:07:33:67:DE' + url = '/sleep?seconds=0.005' + self.assertRaises(ReadTimeoutError, https_pool.request, 'GET', url) + + timeout = Timeout(total=None) + https_pool = HTTPSConnectionPool(self.host, self.port, timeout=timeout, + cert_reqs='CERT_NONE') + https_pool.request('GET', '/') + + def test_tunnel(self): + """ test the _tunnel behavior """ + timeout = Timeout(total=None) + https_pool = HTTPSConnectionPool(self.host, self.port, timeout=timeout, + cert_reqs='CERT_NONE') + conn = https_pool._new_conn() + try: + conn.set_tunnel(self.host, self.port) + except AttributeError: # python 2.6 + conn._set_tunnel(self.host, self.port) + conn._tunnel = mock.Mock() + https_pool._make_request(conn, 'GET', '/') + conn._tunnel.assert_called_once_with() + + @onlyPy26OrOlder + def test_tunnel_old_python(self): + """HTTPSConnection can still make connections if _tunnel_host isn't set + + The _tunnel_host attribute was added in 2.6.3 - because our test runners + generally use the latest Python 2.6, we simulate the old version by + deleting the attribute from the HTTPSConnection. + """ + conn = self._pool._new_conn() + del conn._tunnel_host + self._pool._make_request(conn, 'GET', '/') + + @requires_network + def test_enhanced_timeout(self): + def new_pool(timeout, cert_reqs='CERT_REQUIRED'): + https_pool = HTTPSConnectionPool(TARPIT_HOST, self.port, + timeout=timeout, + retries=False, + cert_reqs=cert_reqs) + return https_pool + + https_pool = new_pool(Timeout(connect=0.001)) + conn = https_pool._new_conn() + self.assertRaises(ConnectTimeoutError, https_pool.request, 'GET', '/') + self.assertRaises(ConnectTimeoutError, https_pool._make_request, conn, + 'GET', '/') + + https_pool = new_pool(Timeout(connect=5)) + self.assertRaises(ConnectTimeoutError, https_pool.request, 'GET', '/', + timeout=Timeout(connect=0.001)) + + t = Timeout(total=None) + https_pool = new_pool(t) + conn = https_pool._new_conn() + self.assertRaises(ConnectTimeoutError, https_pool.request, 'GET', '/', + timeout=Timeout(total=None, connect=0.001)) + + def test_enhanced_ssl_connection(self): + fingerprint = 'CC:45:6A:90:82:F7FF:C0:8218:8e:7A:F2:8A:D7:1E:07:33:67:DE' + + conn = VerifiedHTTPSConnection(self.host, self.port) + https_pool = HTTPSConnectionPool(self.host, self.port, + cert_reqs='CERT_REQUIRED', ca_certs=DEFAULT_CA, + assert_fingerprint=fingerprint) + + https_pool._make_request(conn, 'GET', '/') + + def test_ssl_correct_system_time(self): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + self._pool.request('GET', '/') + + self.assertEqual([], w) + + def test_ssl_wrong_system_time(self): + with mock.patch('urllib3.connection.datetime') as mock_date: + mock_date.date.today.return_value = datetime.date(1970, 1, 1) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + self._pool.request('GET', '/') + + self.assertEqual(len(w), 1) + warning = w[0] + + self.assertEqual(SystemTimeWarning, warning.category) + self.assertTrue(str(RECENT_DATE) in warning.message.args[0]) + + +class TestHTTPS_TLSv1(HTTPSDummyServerTestCase): + certs = DEFAULT_CERTS.copy() + certs['ssl_version'] = ssl.PROTOCOL_TLSv1 + + def setUp(self): + self._pool = HTTPSConnectionPool(self.host, self.port) + + def test_set_ssl_version_to_sslv3(self): + self._pool.ssl_version = ssl.PROTOCOL_SSLv3 + self.assertRaises(SSLError, self._pool.request, 'GET', '/') + + def test_ssl_version_as_string(self): + self._pool.ssl_version = 'PROTOCOL_SSLv3' + self.assertRaises(SSLError, self._pool.request, 'GET', '/') + + def test_ssl_version_as_short_string(self): + self._pool.ssl_version = 'SSLv3' + self.assertRaises(SSLError, self._pool.request, 'GET', '/') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/with_dummyserver/test_https.pyc b/test/with_dummyserver/test_https.pyc new file mode 100644 index 0000000..6d85316 Binary files /dev/null and b/test/with_dummyserver/test_https.pyc differ diff --git a/test/with_dummyserver/test_poolmanager.py b/test/with_dummyserver/test_poolmanager.py new file mode 100644 index 0000000..52ff974 --- /dev/null +++ b/test/with_dummyserver/test_poolmanager.py @@ -0,0 +1,136 @@ +import unittest +import json + +from dummyserver.testcase import (HTTPDummyServerTestCase, + IPv6HTTPDummyServerTestCase) +from urllib3.poolmanager import PoolManager +from urllib3.connectionpool import port_by_scheme +from urllib3.exceptions import MaxRetryError, SSLError + + +class TestPoolManager(HTTPDummyServerTestCase): + + def setUp(self): + self.base_url = 'http://%s:%d' % (self.host, self.port) + self.base_url_alt = 'http://%s:%d' % (self.host_alt, self.port) + + def test_redirect(self): + http = PoolManager() + + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/' % self.base_url}, + redirect=False) + + self.assertEqual(r.status, 303) + + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/' % self.base_url}) + + self.assertEqual(r.status, 200) + self.assertEqual(r.data, b'Dummy server!') + + def test_redirect_twice(self): + http = PoolManager() + + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/redirect' % self.base_url}, + redirect=False) + + self.assertEqual(r.status, 303) + + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/redirect?target=%s/' % (self.base_url, self.base_url)}) + + self.assertEqual(r.status, 200) + self.assertEqual(r.data, b'Dummy server!') + + def test_redirect_to_relative_url(self): + http = PoolManager() + + r = http.request('GET', '%s/redirect' % self.base_url, + fields = {'target': '/redirect'}, + redirect = False) + + self.assertEqual(r.status, 303) + + r = http.request('GET', '%s/redirect' % self.base_url, + fields = {'target': '/redirect'}) + + self.assertEqual(r.status, 200) + self.assertEqual(r.data, b'Dummy server!') + + def test_cross_host_redirect(self): + http = PoolManager() + + cross_host_location = '%s/echo?a=b' % self.base_url_alt + try: + http.request('GET', '%s/redirect' % self.base_url, + fields={'target': cross_host_location}, + timeout=0.01, retries=0) + self.fail("Request succeeded instead of raising an exception like it should.") + + except MaxRetryError: + pass + + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/echo?a=b' % self.base_url_alt}, + timeout=0.01, retries=1) + + self.assertEqual(r._pool.host, self.host_alt) + + def test_missing_port(self): + # Can a URL that lacks an explicit port like ':80' succeed, or + # will all such URLs fail with an error? + + http = PoolManager() + + # By globally adjusting `port_by_scheme` we pretend for a moment + # that HTTP's default port is not 80, but is the port at which + # our test server happens to be listening. + port_by_scheme['http'] = self.port + try: + r = http.request('GET', 'http://%s/' % self.host, retries=0) + finally: + port_by_scheme['http'] = 80 + + self.assertEqual(r.status, 200) + self.assertEqual(r.data, b'Dummy server!') + + def test_headers(self): + http = PoolManager(headers={'Foo': 'bar'}) + + r = http.request_encode_url('GET', '%s/headers' % self.base_url) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + + r = http.request_encode_body('POST', '%s/headers' % self.base_url) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + + r = http.request_encode_url('GET', '%s/headers' % self.base_url, headers={'Baz': 'quux'}) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), None) + self.assertEqual(returned_headers.get('Baz'), 'quux') + + r = http.request_encode_body('GET', '%s/headers' % self.base_url, headers={'Baz': 'quux'}) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), None) + self.assertEqual(returned_headers.get('Baz'), 'quux') + + def test_http_with_ssl_keywords(self): + http = PoolManager(ca_certs='REQUIRED') + + r = http.request('GET', 'http://%s:%s/' % (self.host, self.port)) + self.assertEqual(r.status, 200) + + +class TestIPv6PoolManager(IPv6HTTPDummyServerTestCase): + def setUp(self): + self.base_url = 'http://[%s]:%d' % (self.host, self.port) + + def test_ipv6(self): + http = PoolManager() + http.request('GET', self.base_url) + +if __name__ == '__main__': + unittest.main() diff --git a/test/with_dummyserver/test_poolmanager.pyc b/test/with_dummyserver/test_poolmanager.pyc new file mode 100644 index 0000000..26c52e9 Binary files /dev/null and b/test/with_dummyserver/test_poolmanager.pyc differ diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py new file mode 100644 index 0000000..61eedf1 --- /dev/null +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -0,0 +1,263 @@ +import unittest +import json +import socket + +from dummyserver.testcase import HTTPDummyProxyTestCase +from dummyserver.server import ( + DEFAULT_CA, DEFAULT_CA_BAD, get_unreachable_address) + +from urllib3.poolmanager import proxy_from_url, ProxyManager +from urllib3.exceptions import MaxRetryError, SSLError, ProxyError +from urllib3.connectionpool import connection_from_url, VerifiedHTTPSConnection + + +class TestHTTPProxyManager(HTTPDummyProxyTestCase): + + def setUp(self): + self.http_url = 'http://%s:%d' % (self.http_host, self.http_port) + self.http_url_alt = 'http://%s:%d' % (self.http_host_alt, + self.http_port) + self.https_url = 'https://%s:%d' % (self.https_host, self.https_port) + self.https_url_alt = 'https://%s:%d' % (self.https_host_alt, + self.https_port) + self.proxy_url = 'http://%s:%d' % (self.proxy_host, self.proxy_port) + + def test_basic_proxy(self): + http = proxy_from_url(self.proxy_url) + + r = http.request('GET', '%s/' % self.http_url) + self.assertEqual(r.status, 200) + + r = http.request('GET', '%s/' % self.https_url) + self.assertEqual(r.status, 200) + + def test_nagle_proxy(self): + """ Test that proxy connections do not have TCP_NODELAY turned on """ + http = proxy_from_url(self.proxy_url) + hc2 = http.connection_from_host(self.http_host, self.http_port) + conn = hc2._get_conn() + hc2._make_request(conn, 'GET', '/') + tcp_nodelay_setting = conn.sock.getsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY) + self.assertEqual(tcp_nodelay_setting, 0, + ("Expected TCP_NODELAY for proxies to be set " + "to zero, instead was %s" % tcp_nodelay_setting)) + + def test_proxy_conn_fail(self): + host, port = get_unreachable_address() + http = proxy_from_url('http://%s:%s/' % (host, port), retries=1) + self.assertRaises(MaxRetryError, http.request, 'GET', + '%s/' % self.https_url) + self.assertRaises(MaxRetryError, http.request, 'GET', + '%s/' % self.http_url) + + try: + http.request('GET', '%s/' % self.http_url) + self.fail("Failed to raise retry error.") + except MaxRetryError as e: + self.assertEqual(type(e.reason), ProxyError) + + def test_oldapi(self): + http = ProxyManager(connection_from_url(self.proxy_url)) + + r = http.request('GET', '%s/' % self.http_url) + self.assertEqual(r.status, 200) + + r = http.request('GET', '%s/' % self.https_url) + self.assertEqual(r.status, 200) + + def test_proxy_verified(self): + http = proxy_from_url(self.proxy_url, cert_reqs='REQUIRED', + ca_certs=DEFAULT_CA_BAD) + https_pool = http._new_pool('https', self.https_host, + self.https_port) + try: + https_pool.request('GET', '/') + self.fail("Didn't raise SSL error with wrong CA") + except SSLError as e: + self.assertTrue('certificate verify failed' in str(e), + "Expected 'certificate verify failed'," + "instead got: %r" % e) + + http = proxy_from_url(self.proxy_url, cert_reqs='REQUIRED', + ca_certs=DEFAULT_CA) + https_pool = http._new_pool('https', self.https_host, + self.https_port) + + conn = https_pool._new_conn() + self.assertEqual(conn.__class__, VerifiedHTTPSConnection) + https_pool.request('GET', '/') # Should succeed without exceptions. + + http = proxy_from_url(self.proxy_url, cert_reqs='REQUIRED', + ca_certs=DEFAULT_CA) + https_fail_pool = http._new_pool('https', '127.0.0.1', self.https_port) + + try: + https_fail_pool.request('GET', '/') + self.fail("Didn't raise SSL invalid common name") + except SSLError as e: + self.assertTrue("doesn't match" in str(e)) + + def test_redirect(self): + http = proxy_from_url(self.proxy_url) + + r = http.request('GET', '%s/redirect' % self.http_url, + fields={'target': '%s/' % self.http_url}, + redirect=False) + + self.assertEqual(r.status, 303) + + r = http.request('GET', '%s/redirect' % self.http_url, + fields={'target': '%s/' % self.http_url}) + + self.assertEqual(r.status, 200) + self.assertEqual(r.data, b'Dummy server!') + + def test_cross_host_redirect(self): + http = proxy_from_url(self.proxy_url) + + cross_host_location = '%s/echo?a=b' % self.http_url_alt + try: + http.request('GET', '%s/redirect' % self.http_url, + fields={'target': cross_host_location}, + timeout=0.1, retries=0) + self.fail("We don't want to follow redirects here.") + + except MaxRetryError: + pass + + r = http.request('GET', '%s/redirect' % self.http_url, + fields={'target': '%s/echo?a=b' % self.http_url_alt}, + timeout=0.1, retries=1) + self.assertNotEqual(r._pool.host, self.http_host_alt) + + def test_cross_protocol_redirect(self): + http = proxy_from_url(self.proxy_url) + + cross_protocol_location = '%s/echo?a=b' % self.https_url + try: + http.request('GET', '%s/redirect' % self.http_url, + fields={'target': cross_protocol_location}, + timeout=0.1, retries=0) + self.fail("We don't want to follow redirects here.") + + except MaxRetryError: + pass + + r = http.request('GET', '%s/redirect' % self.http_url, + fields={'target': '%s/echo?a=b' % self.https_url}, + timeout=0.1, retries=1) + self.assertEqual(r._pool.host, self.https_host) + + def test_headers(self): + http = proxy_from_url(self.proxy_url,headers={'Foo': 'bar'}, + proxy_headers={'Hickory': 'dickory'}) + + r = http.request_encode_url('GET', '%s/headers' % self.http_url) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + self.assertEqual(returned_headers.get('Hickory'), 'dickory') + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.http_host,self.http_port)) + + r = http.request_encode_url('GET', '%s/headers' % self.http_url_alt) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + self.assertEqual(returned_headers.get('Hickory'), 'dickory') + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.http_host_alt,self.http_port)) + + r = http.request_encode_url('GET', '%s/headers' % self.https_url) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + self.assertEqual(returned_headers.get('Hickory'), None) + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.https_host,self.https_port)) + + r = http.request_encode_url('GET', '%s/headers' % self.https_url_alt) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + self.assertEqual(returned_headers.get('Hickory'), None) + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.https_host_alt,self.https_port)) + + r = http.request_encode_body('POST', '%s/headers' % self.http_url) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + self.assertEqual(returned_headers.get('Hickory'), 'dickory') + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.http_host,self.http_port)) + + r = http.request_encode_url('GET', '%s/headers' % self.http_url, headers={'Baz': 'quux'}) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), None) + self.assertEqual(returned_headers.get('Baz'), 'quux') + self.assertEqual(returned_headers.get('Hickory'), 'dickory') + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.http_host,self.http_port)) + + r = http.request_encode_url('GET', '%s/headers' % self.https_url, headers={'Baz': 'quux'}) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), None) + self.assertEqual(returned_headers.get('Baz'), 'quux') + self.assertEqual(returned_headers.get('Hickory'), None) + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.https_host,self.https_port)) + + r = http.request_encode_body('GET', '%s/headers' % self.http_url, headers={'Baz': 'quux'}) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), None) + self.assertEqual(returned_headers.get('Baz'), 'quux') + self.assertEqual(returned_headers.get('Hickory'), 'dickory') + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.http_host,self.http_port)) + + r = http.request_encode_body('GET', '%s/headers' % self.https_url, headers={'Baz': 'quux'}) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), None) + self.assertEqual(returned_headers.get('Baz'), 'quux') + self.assertEqual(returned_headers.get('Hickory'), None) + self.assertEqual(returned_headers.get('Host'), + '%s:%s'%(self.https_host,self.https_port)) + + def test_proxy_pooling(self): + http = proxy_from_url(self.proxy_url) + + for x in range(2): + r = http.urlopen('GET', self.http_url) + self.assertEqual(len(http.pools), 1) + + for x in range(2): + r = http.urlopen('GET', self.http_url_alt) + self.assertEqual(len(http.pools), 1) + + for x in range(2): + r = http.urlopen('GET', self.https_url) + self.assertEqual(len(http.pools), 2) + + for x in range(2): + r = http.urlopen('GET', self.https_url_alt) + self.assertEqual(len(http.pools), 3) + + def test_proxy_pooling_ext(self): + http = proxy_from_url(self.proxy_url) + hc1 = http.connection_from_url(self.http_url) + hc2 = http.connection_from_host(self.http_host, self.http_port) + hc3 = http.connection_from_url(self.http_url_alt) + hc4 = http.connection_from_host(self.http_host_alt, self.http_port) + self.assertEqual(hc1,hc2) + self.assertEqual(hc2,hc3) + self.assertEqual(hc3,hc4) + + sc1 = http.connection_from_url(self.https_url) + sc2 = http.connection_from_host(self.https_host, + self.https_port,scheme='https') + sc3 = http.connection_from_url(self.https_url_alt) + sc4 = http.connection_from_host(self.https_host_alt, + self.https_port,scheme='https') + self.assertEqual(sc1,sc2) + self.assertNotEqual(sc2,sc3) + self.assertEqual(sc3,sc4) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/with_dummyserver/test_proxy_poolmanager.pyc b/test/with_dummyserver/test_proxy_poolmanager.pyc new file mode 100644 index 0000000..12c320c Binary files /dev/null and b/test/with_dummyserver/test_proxy_poolmanager.pyc differ diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py new file mode 100644 index 0000000..e1ac1c6 --- /dev/null +++ b/test/with_dummyserver/test_socketlevel.py @@ -0,0 +1,544 @@ +# TODO: Break this module up into pieces. Maybe group by functionality tested +# rather than the socket level-ness of it. + +from urllib3 import HTTPConnectionPool, HTTPSConnectionPool +from urllib3.poolmanager import proxy_from_url +from urllib3.exceptions import ( + MaxRetryError, + ProxyError, + ReadTimeoutError, + SSLError, + ProtocolError, +) +from urllib3.util.ssl_ import HAS_SNI +from urllib3.util.timeout import Timeout +from urllib3.util.retry import Retry + +from dummyserver.testcase import SocketDummyServerTestCase +from dummyserver.server import ( + DEFAULT_CERTS, DEFAULT_CA, get_unreachable_address) + +from nose.plugins.skip import SkipTest +from threading import Event +import socket +import ssl + + +class TestCookies(SocketDummyServerTestCase): + + def test_multi_setcookie(self): + def multicookie_response_handler(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + sock.send(b'HTTP/1.1 200 OK\r\n' + b'Set-Cookie: foo=1\r\n' + b'Set-Cookie: bar=1\r\n' + b'\r\n') + sock.close() + + self._start_server(multicookie_response_handler) + pool = HTTPConnectionPool(self.host, self.port) + r = pool.request('GET', '/', retries=0) + self.assertEqual(r.headers, {'set-cookie': 'foo=1, bar=1'}) + + +class TestSNI(SocketDummyServerTestCase): + + def test_hostname_in_first_request_packet(self): + if not HAS_SNI: + raise SkipTest('SNI-support not available') + + done_receiving = Event() + self.buf = b'' + + def socket_handler(listener): + sock = listener.accept()[0] + + self.buf = sock.recv(65536) # We only accept one packet + done_receiving.set() # let the test know it can proceed + sock.close() + + self._start_server(socket_handler) + pool = HTTPSConnectionPool(self.host, self.port) + try: + pool.request('GET', '/', retries=0) + except SSLError: # We are violating the protocol + pass + done_receiving.wait() + self.assertTrue(self.host.encode() in self.buf, + "missing hostname in SSL handshake") + + +class TestSocketClosing(SocketDummyServerTestCase): + + def test_recovery_when_server_closes_connection(self): + # Does the pool work seamlessly if an open connection in the + # connection pool gets hung up on by the server, then reaches + # the front of the queue again? + + done_closing = Event() + + def socket_handler(listener): + for i in 0, 1: + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf = sock.recv(65536) + + body = 'Response %d' % i + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(body), body)).encode('utf-8')) + + sock.close() # simulate a server timing out, closing socket + done_closing.set() # let the test know it can proceed + + self._start_server(socket_handler) + pool = HTTPConnectionPool(self.host, self.port) + + response = pool.request('GET', '/', retries=0) + self.assertEqual(response.status, 200) + self.assertEqual(response.data, b'Response 0') + + done_closing.wait() # wait until the socket in our pool gets closed + + response = pool.request('GET', '/', retries=0) + self.assertEqual(response.status, 200) + self.assertEqual(response.data, b'Response 1') + + def test_connection_refused(self): + # Does the pool retry if there is no listener on the port? + host, port = get_unreachable_address() + pool = HTTPConnectionPool(host, port) + self.assertRaises(MaxRetryError, pool.request, 'GET', '/', retries=0) + + def test_connection_read_timeout(self): + timed_out = Event() + def socket_handler(listener): + sock = listener.accept()[0] + while not sock.recv(65536).endswith(b'\r\n\r\n'): + pass + + timed_out.wait() + sock.close() + + self._start_server(socket_handler) + pool = HTTPConnectionPool(self.host, self.port, timeout=0.001, retries=False) + + try: + self.assertRaises(ReadTimeoutError, pool.request, 'GET', '/') + finally: + timed_out.set() + + def test_timeout_errors_cause_retries(self): + def socket_handler(listener): + sock_timeout = listener.accept()[0] + + # Wait for a second request before closing the first socket. + sock = listener.accept()[0] + sock_timeout.close() + + # Second request. + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + # Now respond immediately. + body = 'Response 2' + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(body), body)).encode('utf-8')) + + sock.close() + + # In situations where the main thread throws an exception, the server + # thread can hang on an accept() call. This ensures everything times + # out within 1 second. This should be long enough for any socket + # operations in the test suite to complete + default_timeout = socket.getdefaulttimeout() + socket.setdefaulttimeout(1) + + try: + self._start_server(socket_handler) + t = Timeout(connect=0.001, read=0.001) + pool = HTTPConnectionPool(self.host, self.port, timeout=t) + + response = pool.request('GET', '/', retries=1) + self.assertEqual(response.status, 200) + self.assertEqual(response.data, b'Response 2') + finally: + socket.setdefaulttimeout(default_timeout) + + def test_delayed_body_read_timeout(self): + timed_out = Event() + + def socket_handler(listener): + sock = listener.accept()[0] + buf = b'' + body = 'Hi' + while not buf.endswith(b'\r\n\r\n'): + buf = sock.recv(65536) + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' % len(body)).encode('utf-8')) + + timed_out.wait() + sock.send(body.encode('utf-8')) + sock.close() + + self._start_server(socket_handler) + pool = HTTPConnectionPool(self.host, self.port) + + response = pool.urlopen('GET', '/', retries=0, preload_content=False, + timeout=Timeout(connect=1, read=0.001)) + try: + self.assertRaises(ReadTimeoutError, response.read) + finally: + timed_out.set() + + def test_incomplete_response(self): + body = 'Response' + partial_body = body[:2] + + def socket_handler(listener): + sock = listener.accept()[0] + + # Consume request + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf = sock.recv(65536) + + # Send partial response and close socket. + sock.send(( + 'HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(body), partial_body)).encode('utf-8') + ) + sock.close() + + self._start_server(socket_handler) + pool = HTTPConnectionPool(self.host, self.port) + + response = pool.request('GET', '/', retries=0, preload_content=False) + self.assertRaises(ProtocolError, response.read) + + def test_retry_weird_http_version(self): + """ Retry class should handle httplib.BadStatusLine errors properly """ + + def socket_handler(listener): + sock = listener.accept()[0] + # First request. + # Pause before responding so the first request times out. + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + # send unknown http protocol + body = "bad http 0.5 response" + sock.send(('HTTP/0.5 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(body), body)).encode('utf-8')) + sock.close() + + # Second request. + sock = listener.accept()[0] + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + # Now respond immediately. + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + 'foo' % (len('foo'))).encode('utf-8')) + + sock.close() # Close the socket. + + self._start_server(socket_handler) + pool = HTTPConnectionPool(self.host, self.port) + retry = Retry(read=1) + response = pool.request('GET', '/', retries=retry) + self.assertEqual(response.status, 200) + self.assertEqual(response.data, b'foo') + + + +class TestProxyManager(SocketDummyServerTestCase): + + def test_simple(self): + def echo_socket_handler(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(buf), buf.decode('utf-8'))).encode('utf-8')) + sock.close() + + self._start_server(echo_socket_handler) + base_url = 'http://%s:%d' % (self.host, self.port) + proxy = proxy_from_url(base_url) + + r = proxy.request('GET', 'http://google.com/') + + self.assertEqual(r.status, 200) + # FIXME: The order of the headers is not predictable right now. We + # should fix that someday (maybe when we migrate to + # OrderedDict/MultiDict). + self.assertEqual(sorted(r.data.split(b'\r\n')), + sorted([ + b'GET http://google.com/ HTTP/1.1', + b'Host: google.com', + b'Accept-Encoding: identity', + b'Accept: */*', + b'', + b'', + ])) + + def test_headers(self): + def echo_socket_handler(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(buf), buf.decode('utf-8'))).encode('utf-8')) + sock.close() + + self._start_server(echo_socket_handler) + base_url = 'http://%s:%d' % (self.host, self.port) + + # Define some proxy headers. + proxy_headers = {'For The Proxy': 'YEAH!'} + proxy = proxy_from_url(base_url, proxy_headers=proxy_headers) + + conn = proxy.connection_from_url('http://www.google.com/') + + r = conn.urlopen('GET', 'http://www.google.com/', assert_same_host=False) + + self.assertEqual(r.status, 200) + # FIXME: The order of the headers is not predictable right now. We + # should fix that someday (maybe when we migrate to + # OrderedDict/MultiDict). + self.assertTrue(b'For The Proxy: YEAH!\r\n' in r.data) + + def test_retries(self): + def echo_socket_handler(listener): + sock = listener.accept()[0] + # First request, which should fail + sock.close() + + # Second request + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(buf), buf.decode('utf-8'))).encode('utf-8')) + sock.close() + + self._start_server(echo_socket_handler) + base_url = 'http://%s:%d' % (self.host, self.port) + + proxy = proxy_from_url(base_url) + conn = proxy.connection_from_url('http://www.google.com') + + r = conn.urlopen('GET', 'http://www.google.com', + assert_same_host=False, retries=1) + self.assertEqual(r.status, 200) + + self.assertRaises(ProxyError, conn.urlopen, 'GET', + 'http://www.google.com', + assert_same_host=False, retries=False) + + def test_connect_reconn(self): + def proxy_ssl_one(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + s = buf.decode('utf-8') + if not s.startswith('CONNECT '): + sock.send(('HTTP/1.1 405 Method not allowed\r\n' + 'Allow: CONNECT\r\n\r\n').encode('utf-8')) + sock.close() + return + + if not s.startswith('CONNECT %s:443' % (self.host,)): + sock.send(('HTTP/1.1 403 Forbidden\r\n\r\n').encode('utf-8')) + sock.close() + return + + sock.send(('HTTP/1.1 200 Connection Established\r\n\r\n').encode('utf-8')) + ssl_sock = ssl.wrap_socket(sock, + server_side=True, + keyfile=DEFAULT_CERTS['keyfile'], + certfile=DEFAULT_CERTS['certfile'], + ca_certs=DEFAULT_CA) + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += ssl_sock.recv(65536) + + ssl_sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: 2\r\n' + 'Connection: close\r\n' + '\r\n' + 'Hi').encode('utf-8')) + ssl_sock.close() + def echo_socket_handler(listener): + proxy_ssl_one(listener) + proxy_ssl_one(listener) + + self._start_server(echo_socket_handler) + base_url = 'http://%s:%d' % (self.host, self.port) + + proxy = proxy_from_url(base_url) + + url = 'https://{0}'.format(self.host) + conn = proxy.connection_from_url(url) + r = conn.urlopen('GET', url, retries=0) + self.assertEqual(r.status, 200) + r = conn.urlopen('GET', url, retries=0) + self.assertEqual(r.status, 200) + + +class TestSSL(SocketDummyServerTestCase): + + def test_ssl_failure_midway_through_conn(self): + def socket_handler(listener): + sock = listener.accept()[0] + sock2 = sock.dup() + ssl_sock = ssl.wrap_socket(sock, + server_side=True, + keyfile=DEFAULT_CERTS['keyfile'], + certfile=DEFAULT_CERTS['certfile'], + ca_certs=DEFAULT_CA) + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += ssl_sock.recv(65536) + + # Deliberately send from the non-SSL socket. + sock2.send(( + 'HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: 2\r\n' + '\r\n' + 'Hi').encode('utf-8')) + sock2.close() + ssl_sock.close() + + self._start_server(socket_handler) + pool = HTTPSConnectionPool(self.host, self.port) + + self.assertRaises(SSLError, pool.request, 'GET', '/', retries=0) + + def test_ssl_read_timeout(self): + timed_out = Event() + + def socket_handler(listener): + sock = listener.accept()[0] + ssl_sock = ssl.wrap_socket(sock, + server_side=True, + keyfile=DEFAULT_CERTS['keyfile'], + certfile=DEFAULT_CERTS['certfile'], + ca_certs=DEFAULT_CA) + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += ssl_sock.recv(65536) + + # Send incomplete message (note Content-Length) + ssl_sock.send(( + 'HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: 10\r\n' + '\r\n' + 'Hi-').encode('utf-8')) + timed_out.wait() + + sock.close() + ssl_sock.close() + + self._start_server(socket_handler) + pool = HTTPSConnectionPool(self.host, self.port) + + response = pool.urlopen('GET', '/', retries=0, preload_content=False, + timeout=Timeout(connect=1, read=0.001)) + try: + self.assertRaises(ReadTimeoutError, response.read) + finally: + timed_out.set() + + +def consume_socket(sock, chunks=65536): + while not sock.recv(chunks).endswith(b'\r\n\r\n'): + pass + + +def create_response_handler(response, num=1): + def socket_handler(listener): + for _ in range(num): + sock = listener.accept()[0] + consume_socket(sock) + + sock.send(response) + sock.close() + + return socket_handler + + +class TestErrorWrapping(SocketDummyServerTestCase): + + def test_bad_statusline(self): + handler = create_response_handler( + b'HTTP/1.1 Omg What Is This?\r\n' + b'Content-Length: 0\r\n' + b'\r\n' + ) + self._start_server(handler) + pool = HTTPConnectionPool(self.host, self.port, retries=False) + self.assertRaises(ProtocolError, pool.request, 'GET', '/') + + def test_unknown_protocol(self): + handler = create_response_handler( + b'HTTP/1000 200 OK\r\n' + b'Content-Length: 0\r\n' + b'\r\n' + ) + self._start_server(handler) + pool = HTTPConnectionPool(self.host, self.port, retries=False) + self.assertRaises(ProtocolError, pool.request, 'GET', '/') diff --git a/test/with_dummyserver/test_socketlevel.pyc b/test/with_dummyserver/test_socketlevel.pyc new file mode 100644 index 0000000..ba3b19e Binary files /dev/null and b/test/with_dummyserver/test_socketlevel.pyc differ diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 168944c..964cd4b 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.9 +Version: 1.9.1 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -13,6 +13,9 @@ Description: ======= .. image:: https://travis-ci.org/shazow/urllib3.png?branch=master :target: https://travis-ci.org/shazow/urllib3 + .. image:: https://www.bountysource.com/badge/tracker?tracker_id=192525 + :target: https://www.bountysource.com/trackers/192525-urllib3?utm_source=192525&utm_medium=shield&utm_campaign=TRACKER_BADGE + Highlights ========== @@ -153,6 +156,27 @@ Description: ======= Changes ======= + 1.9.1 (2014-09-13) + ++++++++++++++++++ + + * Apply socket arguments before binding. (Issue #427) + + * More careful checks if fp-like object is closed. (Issue #435) + + * Fixed packaging issues of some development-related files not + getting included. (Issue #440) + + * Allow performing *only* fingerprint verification. (Issue #444) + + * Emit ``SecurityWarning`` if system clock is waaay off. (Issue #445) + + * Fixed PyOpenSSL compatibility with PyPy. (Issue #450) + + * Fixed ``BrokenPipeError`` and ``ConnectionError`` handling in Py3. + (Issue #443) + + + 1.9 (2014-07-04) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index e0b9ddd..2f0b5fc 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -7,11 +7,29 @@ README.rst dev-requirements.txt setup.cfg setup.py +docs/Makefile +docs/README +docs/collections.rst +docs/conf.py +docs/contrib.rst +docs/doc-requirements.txt +docs/exceptions.rst +docs/helpers.rst +docs/index.rst +docs/make.bat +docs/managers.rst +docs/pools.rst +docs/security.rst dummyserver/__init__.py +dummyserver/__init__.pyc dummyserver/handlers.py +dummyserver/handlers.pyc dummyserver/proxy.py +dummyserver/proxy.pyc dummyserver/server.py +dummyserver/server.pyc dummyserver/testcase.py +dummyserver/testcase.pyc dummyserver/certs/cacert.key dummyserver/certs/cacert.pem dummyserver/certs/client.csr @@ -22,17 +40,49 @@ dummyserver/certs/server.crt dummyserver/certs/server.csr dummyserver/certs/server.key dummyserver/certs/server.key.org +test/__init__.py +test/__init__.pyc +test/benchmark.py +test/port_helpers.py +test/port_helpers.pyc test/test_collections.py +test/test_collections.pyc test/test_compatibility.py +test/test_compatibility.pyc test/test_connectionpool.py +test/test_connectionpool.pyc test/test_exceptions.py +test/test_exceptions.pyc test/test_fields.py +test/test_fields.pyc test/test_filepost.py +test/test_filepost.pyc test/test_poolmanager.py +test/test_poolmanager.pyc test/test_proxymanager.py +test/test_proxymanager.pyc test/test_response.py +test/test_response.pyc test/test_retry.py +test/test_retry.pyc test/test_util.py +test/test_util.pyc +test/contrib/__init__.py +test/contrib/__init__.pyc +test/contrib/test_pyopenssl.py +test/contrib/test_pyopenssl.pyc +test/with_dummyserver/__init__.py +test/with_dummyserver/__init__.pyc +test/with_dummyserver/test_connectionpool.py +test/with_dummyserver/test_connectionpool.pyc +test/with_dummyserver/test_https.py +test/with_dummyserver/test_https.pyc +test/with_dummyserver/test_poolmanager.py +test/with_dummyserver/test_poolmanager.pyc +test/with_dummyserver/test_proxy_poolmanager.py +test/with_dummyserver/test_proxy_poolmanager.pyc +test/with_dummyserver/test_socketlevel.py +test/with_dummyserver/test_socketlevel.pyc urllib3/__init__.py urllib3/_collections.py urllib3/connection.py diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 56f5bf4..3546d13 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.9' +__version__ = '1.9.1' from .connectionpool import ( @@ -57,7 +57,7 @@ del NullHandler # Set security warning to only go off once by default. import warnings -warnings.simplefilter('module', exceptions.InsecureRequestWarning) +warnings.simplefilter('module', exceptions.SecurityWarning) def disable_warnings(category=exceptions.HTTPWarning): """ diff --git a/urllib3/connection.py b/urllib3/connection.py index 0d578d7..cebdd86 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -1,6 +1,9 @@ +import datetime import sys import socket from socket import timeout as SocketTimeout +import warnings +from .packages import six try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException @@ -24,11 +27,19 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass + + from .exceptions import ( ConnectTimeoutError, + SystemTimeWarning, ) from .packages.ssl_match_hostname import match_hostname -from .packages import six from .util.ssl_ import ( resolve_cert_reqs, @@ -37,14 +48,16 @@ from .util.ssl_ import ( assert_fingerprint, ) -from .util import connection +from .util import connection port_by_scheme = { 'http': 80, 'https': 443, } +RECENT_DATE = datetime.date(2014, 1, 1) + class HTTPConnection(_HTTPConnection, object): """ @@ -172,6 +185,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None + assert_fingerprint = None def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -206,6 +220,14 @@ class VerifiedHTTPSConnection(HTTPSConnection): # Override the host with the one we're requesting data from. hostname = self._tunnel_host + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn(( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors').format(RECENT_DATE), + SystemTimeWarning + ) + # Wrap socket using verification with the root certs in # trusted_root_certs self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, @@ -214,15 +236,16 @@ class VerifiedHTTPSConnection(HTTPSConnection): server_hostname=hostname, ssl_version=resolved_ssl_version) - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or hostname) + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif resolved_cert_reqs != ssl.CERT_NONE \ + and self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or hostname) - self.is_verified = resolved_cert_reqs == ssl.CERT_REQUIRED + self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED + or self.assert_fingerprint is not None) if ssl: diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 9317fdc..ac6e0ca 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -32,7 +32,7 @@ from .connection import ( port_by_scheme, DummyConnection, HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, - HTTPException, BaseSSLError, + HTTPException, BaseSSLError, ConnectionError ) from .request import RequestMethods from .response import HTTPResponse @@ -542,7 +542,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn = True raise SSLError(e) - except (TimeoutError, HTTPException, SocketError) as e: + except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: if conn: # Discard the connection for these exceptions. It will be # be replaced during the next _get_conn() call. @@ -718,7 +718,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): super(HTTPSConnectionPool, self)._validate_conn(conn) # Force connect early to allow us to validate the connection. - if not conn.sock: + if not getattr(conn, 'sock', None): # AppEngine might not have `.sock` conn.connect() if not conn.is_verified: diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 7a9ea2e..8475eeb 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -29,7 +29,7 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. Activating this module also has the positive side effect of disabling SSL/TLS -encryption in Python 2 (see `CRIME attack`_). +compression in Python 2 (see `CRIME attack`_). If you want to configure the default list of supported cipher suites, you can set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. @@ -46,8 +46,12 @@ Module Variables ''' -from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName +try: + from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT + from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName +except SyntaxError as e: + raise ImportError(e) + import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from pyasn1.type import univ, constraint @@ -155,18 +159,24 @@ def get_subj_alt_name(peer_cert): class WrappedSocket(object): - '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + '''API-compatibility wrapper for Python OpenSSL's Connection-class. + + Note: _makefile_refs, _drop() and _reuse() are needed for the garbage + collector of pypy. + ''' def __init__(self, connection, socket, suppress_ragged_eofs=True): self.connection = connection self.socket = socket self.suppress_ragged_eofs = suppress_ragged_eofs + self._makefile_refs = 0 def fileno(self): return self.socket.fileno() def makefile(self, mode, bufsize=-1): - return _fileobject(self, mode, bufsize) + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) def recv(self, *args, **kwargs): try: @@ -180,7 +190,7 @@ class WrappedSocket(object): rd, wd, ed = select.select( [self.socket], [], [], self.socket.gettimeout()) if not rd: - raise timeout() + raise timeout('The read operation timed out') else: return self.recv(*args, **kwargs) else: @@ -193,7 +203,10 @@ class WrappedSocket(object): return self.connection.sendall(data) def close(self): - return self.connection.shutdown() + if self._makefile_refs < 1: + return self.connection.shutdown() + else: + self._makefile_refs -= 1 def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() @@ -216,6 +229,15 @@ class WrappedSocket(object): ] } + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + def _verify_callback(cnx, x509, err_no, err_depth, return_code): return err_no == 0 diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index fff8bfa..7519ba9 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -60,7 +60,14 @@ ConnectionError = ProtocolError ## Leaf Exceptions class MaxRetryError(RequestError): - "Raised when the maximum number of retries is exceeded." + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ def __init__(self, pool, url, reason=None): self.reason = reason @@ -134,6 +141,16 @@ class LocationParseError(LocationValueError): self.location = location -class InsecureRequestWarning(HTTPWarning): +class SecurityWarning(HTTPWarning): + "Warned when perfoming security reducing actions" + pass + + +class InsecureRequestWarning(SecurityWarning): "Warned when making an unverified HTTPS request." pass + + +class SystemTimeWarning(SecurityWarning): + "Warned when system time is suspected to be wrong" + pass diff --git a/urllib3/response.py b/urllib3/response.py index 7e0d47f..e69de95 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -48,7 +48,10 @@ class HTTPResponse(io.IOBase): HTTP Response container. Backwards-compatible to httplib's HTTPResponse but the response ``body`` is - loaded and decoded on-demand when the ``data`` property is accessed. + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. Extra parameters for behaviour not present in httplib.HTTPResponse: @@ -317,4 +320,14 @@ class HTTPResponse(io.IOBase): return self._fp.flush() def readable(self): + # This method is required for `io` module compatibility. return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[:len(temp)] = temp + return len(temp) diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 062ee9d..2156993 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -66,13 +66,15 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, sock = None try: sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: sock.settimeout(timeout) if source_address: sock.bind(source_address) - # If provided, set socket level options before connecting. - # This is the only addition urllib3 makes to this function. - _set_socket_options(sock, socket_options) sock.connect(sa) return sock diff --git a/urllib3/util/response.py b/urllib3/util/response.py index d0325bc..45fff55 100644 --- a/urllib3/util/response.py +++ b/urllib3/util/response.py @@ -5,9 +5,18 @@ def is_fp_closed(obj): :param obj: The file-like object to check. """ - if hasattr(obj, 'fp'): - # Object is a container for another file-like object that gets released - # on exhaustion (e.g. HTTPResponse) + + try: + # Check via the official file-like-object way. + return obj.closed + except AttributeError: + pass + + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). return obj.fp is None + except AttributeError: + pass - return obj.closed + raise ValueError("Unable to determine whether fp is closed.") diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index 9013197..eb560df 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -83,7 +83,7 @@ class Retry(object): same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. :param iterable status_forcelist: - A set of HTTP status codes that we should force a retry on. + A set of HTTP status codes that we should force a retry on. By default, this is disabled with ``None``. -- cgit v1.2.3 From b6ab7bae87b22c6fae783e8850533219d3bf8a29 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:41 -0700 Subject: Imported Upstream version 1.10 --- CHANGES.rst | 32 ++++ CONTRIBUTORS.txt | 6 + PKG-INFO | 34 +++- dev-requirements.txt | 2 + docs/security.rst | 1 - dummyserver/__init__.pyc | Bin 141 -> 0 bytes dummyserver/certs/README.rst | 24 +++ dummyserver/certs/cacert.no_san.pem | 31 ++++ dummyserver/certs/server.no_san.crt | 16 ++ dummyserver/certs/server.no_san.csr | 12 ++ dummyserver/handlers.pyc | Bin 9742 -> 0 bytes dummyserver/proxy.pyc | Bin 4740 -> 0 bytes dummyserver/server.py | 19 +++ dummyserver/server.pyc | Bin 5992 -> 0 bytes dummyserver/testcase.pyc | Bin 5450 -> 0 bytes test/__init__.pyc | Bin 3946 -> 0 bytes test/contrib/__init__.pyc | Bin 142 -> 0 bytes test/contrib/test_pyopenssl.pyc | Bin 1143 -> 0 bytes test/port_helpers.pyc | Bin 5719 -> 0 bytes test/test_collections.pyc | Bin 6842 -> 0 bytes test/test_compatibility.pyc | Bin 1372 -> 0 bytes test/test_connectionpool.py | 2 +- test/test_connectionpool.pyc | Bin 8862 -> 0 bytes test/test_exceptions.pyc | Bin 1931 -> 0 bytes test/test_fields.pyc | Bin 2739 -> 0 bytes test/test_filepost.pyc | Bin 4916 -> 0 bytes test/test_poolmanager.pyc | Bin 2499 -> 0 bytes test/test_proxymanager.pyc | Bin 1670 -> 0 bytes test/test_response.pyc | Bin 14619 -> 0 bytes test/test_retry.py | 44 ++++- test/test_retry.pyc | Bin 6491 -> 0 bytes test/test_util.py | 121 ++++++++----- test/test_util.pyc | Bin 15036 -> 0 bytes test/with_dummyserver/__init__.pyc | Bin 151 -> 0 bytes test/with_dummyserver/test_connectionpool.py | 12 +- test/with_dummyserver/test_connectionpool.pyc | Bin 27640 -> 0 bytes test/with_dummyserver/test_https.py | 36 +++- test/with_dummyserver/test_https.pyc | Bin 15651 -> 0 bytes test/with_dummyserver/test_poolmanager.pyc | Bin 5591 -> 0 bytes test/with_dummyserver/test_proxy_poolmanager.py | 28 ++- test/with_dummyserver/test_proxy_poolmanager.pyc | Bin 9891 -> 0 bytes test/with_dummyserver/test_socketlevel.py | 18 ++ test/with_dummyserver/test_socketlevel.pyc | Bin 18715 -> 0 bytes urllib3.egg-info/PKG-INFO | 34 +++- urllib3.egg-info/SOURCES.txt | 30 +--- urllib3/__init__.py | 6 +- urllib3/_collections.py | 7 +- urllib3/connection.py | 12 +- urllib3/connectionpool.py | 103 ++++++----- urllib3/contrib/pyopenssl.py | 28 ++- urllib3/exceptions.py | 13 +- urllib3/request.py | 28 +-- urllib3/util/retry.py | 26 +-- urllib3/util/ssl_.py | 208 ++++++++++++++++++----- urllib3/util/url.py | 45 ++++- 55 files changed, 774 insertions(+), 204 deletions(-) delete mode 100644 dummyserver/__init__.pyc create mode 100644 dummyserver/certs/README.rst create mode 100644 dummyserver/certs/cacert.no_san.pem create mode 100644 dummyserver/certs/server.no_san.crt create mode 100644 dummyserver/certs/server.no_san.csr delete mode 100644 dummyserver/handlers.pyc delete mode 100644 dummyserver/proxy.pyc delete mode 100644 dummyserver/server.pyc delete mode 100644 dummyserver/testcase.pyc delete mode 100644 test/__init__.pyc delete mode 100644 test/contrib/__init__.pyc delete mode 100644 test/contrib/test_pyopenssl.pyc delete mode 100644 test/port_helpers.pyc delete mode 100644 test/test_collections.pyc delete mode 100644 test/test_compatibility.pyc delete mode 100644 test/test_connectionpool.pyc delete mode 100644 test/test_exceptions.pyc delete mode 100644 test/test_fields.pyc delete mode 100644 test/test_filepost.pyc delete mode 100644 test/test_poolmanager.pyc delete mode 100644 test/test_proxymanager.pyc delete mode 100644 test/test_response.pyc delete mode 100644 test/test_retry.pyc delete mode 100644 test/test_util.pyc delete mode 100644 test/with_dummyserver/__init__.pyc delete mode 100644 test/with_dummyserver/test_connectionpool.pyc delete mode 100644 test/with_dummyserver/test_https.pyc delete mode 100644 test/with_dummyserver/test_poolmanager.pyc delete mode 100644 test/with_dummyserver/test_proxy_poolmanager.pyc delete mode 100644 test/with_dummyserver/test_socketlevel.pyc diff --git a/CHANGES.rst b/CHANGES.rst index dd2cd2d..552d9b7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,38 @@ Changes ======= +1.10 (2014-12-14) ++++++++++++++++++ + +* Disabled SSLv3. (Issue #473) + +* Add ``Url.url`` property to return the composed url string. (Issue #394) + +* Fixed PyOpenSSL + gevent ``WantWriteError``. (Issue #412) + +* ``MaxRetryError.reason`` will always be an exception, not string. + (Issue #481) + +* Fixed SSL-related timeouts not being detected as timeouts. (Issue #492) + +* Py3: Use ``ssl.create_default_context()`` when available. (Issue #473) + +* Emit ``InsecureRequestWarning`` for *every* insecure HTTPS request. + (Issue #496) + +* Emit ``SecurityWarning`` when certificate has no ``subjectAltName``. + (Issue #499) + +* Close and discard sockets which experienced SSL-related errors. + (Issue #501) + +* Handle ``body`` param in ``.request(...)``. (Issue #513) + +* Respect timeout with HTTPS proxy. (Issue #505) + +* PyOpenSSL: Handle ZeroReturnError exception. (Issue #520) + + 1.9.1 (2014-09-13) ++++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 97f3014..ecaf9bb 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -127,5 +127,11 @@ In chronological order: * Krishna Prasad * Google App Engine documentation +* Aaron Meurer + * Added Url.url, which unparses a Url + +* Evgeny Kapun + * Bugfixes + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/PKG-INFO b/PKG-INFO index 964cd4b..7b5cf18 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.9.1 +Version: 1.10 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -156,6 +156,38 @@ Description: ======= Changes ======= + 1.10 (2014-12-14) + +++++++++++++++++ + + * Disabled SSLv3. (Issue #473) + + * Add ``Url.url`` property to return the composed url string. (Issue #394) + + * Fixed PyOpenSSL + gevent ``WantWriteError``. (Issue #412) + + * ``MaxRetryError.reason`` will always be an exception, not string. + (Issue #481) + + * Fixed SSL-related timeouts not being detected as timeouts. (Issue #492) + + * Py3: Use ``ssl.create_default_context()`` when available. (Issue #473) + + * Emit ``InsecureRequestWarning`` for *every* insecure HTTPS request. + (Issue #496) + + * Emit ``SecurityWarning`` when certificate has no ``subjectAltName``. + (Issue #499) + + * Close and discard sockets which experienced SSL-related errors. + (Issue #501) + + * Handle ``body`` param in ``.request(...)``. (Issue #513) + + * Respect timeout with HTTPS proxy. (Issue #505) + + * PyOpenSSL: Handle ZeroReturnError exception. (Issue #520) + + 1.9.1 (2014-09-13) ++++++++++++++++++ diff --git a/dev-requirements.txt b/dev-requirements.txt index 8010704..2eb5875 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,6 +2,8 @@ nose==1.3.3 mock==1.0.1 coverage==3.7.1 tox==1.7.1 +twine==1.3.1 +wheel==0.24.0 # Tornado 3.2.2 makes our tests flaky, so we stick with 3.1 tornado==3.1.1 diff --git a/docs/security.rst b/docs/security.rst index 5321e24..0566737 100644 --- a/docs/security.rst +++ b/docs/security.rst @@ -147,7 +147,6 @@ Unverified HTTPS requests will trigger a warning:: urllib3/connectionpool.py:736: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.org/en/latest/security.html - (This warning will only appear once by default.) This would be a great time to enable HTTPS verification: :ref:`certifi-with-urllib3`. diff --git a/dummyserver/__init__.pyc b/dummyserver/__init__.pyc deleted file mode 100644 index 24e9f56..0000000 Binary files a/dummyserver/__init__.pyc and /dev/null differ diff --git a/dummyserver/certs/README.rst b/dummyserver/certs/README.rst new file mode 100644 index 0000000..4fb6632 --- /dev/null +++ b/dummyserver/certs/README.rst @@ -0,0 +1,24 @@ +Creating a new SAN-less CRT +--------------------------- + +(Instructions lifted from Heroku_) + +1. Generate a new CSR:: + + openssl req -new -key server.key -out server.new.csr -nodes -days 10957 + +2. Generate a new CRT:: + + openssl x509 -req -in server.new.csr -signkey server.key -out server.new.crt -days 10957 + +Creating a new PEM file with your new CRT +----------------------------------------- + +1. Concatenate the ``crt`` and ``key`` files into one:: + + cat server.new.crt server.key > cacert.new.pem + + +:Last Modified: 1 Nov 2014 + +.. _Heroku: https://devcenter.heroku.com/articles/ssl-certificate-self diff --git a/dummyserver/certs/cacert.no_san.pem b/dummyserver/certs/cacert.no_san.pem new file mode 100644 index 0000000..6df351b --- /dev/null +++ b/dummyserver/certs/cacert.no_san.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIChzCCAfACCQCmk6is+6REjDANBgkqhkiG9w0BAQUFADCBhzELMAkGA1UEBhMC +Q0ExEDAOBgNVBAgMB09udGFyaW8xEDAOBgNVBAcMB09udGFyaW8xHzAdBgNVBAoM +FlNoYXpvdydzIFVzZWQgQ2FycyBJbmMxEjAQBgNVBAMMCWxvY2FsaG9zdDEfMB0G +CSqGSIb3DQEJARYQc2hhem93QGdtYWlsLmNvbTAeFw0xNDEyMDMyMjE3MjVaFw00 +NDEyMDIyMjE3MjVaMIGHMQswCQYDVQQGEwJDQTEQMA4GA1UECAwHT250YXJpbzEQ +MA4GA1UEBwwHT250YXJpbzEfMB0GA1UECgwWU2hhem93J3MgVXNlZCBDYXJzIElu +YzESMBAGA1UEAwwJbG9jYWxob3N0MR8wHQYJKoZIhvcNAQkBFhBzaGF6b3dAZ21h +aWwuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDXe3FqmCWvP8XPxqtT ++0bfL1Tvzvebi46k0WIcUV8bP3vyYiSRXG9ALmyzZH4GHY9UVs4OEDkCMDOBSezB +0y9ai/9doTNcaictdEBu8nfdXKoTtzrn+VX4UPrkH5hm7NQ1fTQuj1MR7yBCmYqN +3Q2Q+Efuujyx0FwBzAuy1aKYuwIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAHI/m9/O +bVR3zBOJZUKlHzTRvfYbYhhfrrcQlbwhjKqNyZcQTL/bJdtQSL19g3ftC5wZPI+y +66R24MqGmRcv5kT32HcuIK1Xhx4nDqTqnTNvGkaIh5CqS4DEP+iqtwDoEbQt8DwL +ejKtvZlyQRKFPTMtmv4VsTIHeVOAj+pXn595 +-----END CERTIFICATE----- +-----BEGIN RSA PRIVATE KEY----- +MIICXgIBAAKBgQDXe3FqmCWvP8XPxqtT+0bfL1Tvzvebi46k0WIcUV8bP3vyYiSR +XG9ALmyzZH4GHY9UVs4OEDkCMDOBSezB0y9ai/9doTNcaictdEBu8nfdXKoTtzrn ++VX4UPrkH5hm7NQ1fTQuj1MR7yBCmYqN3Q2Q+Efuujyx0FwBzAuy1aKYuwIDAQAB +AoGBANOGBM6bbhq7ImYU4qf8+RQrdVg2tc9Fzo+yTnn30sF/rx8/AiCDOV4qdGAh +HKjKKaGj2H/rotqoEFcxBy05LrgJXxydBP72e9PYhNgKOcSmCQu4yALIPEXfKuIM +zgAErHVJ2l79fif3D4hzNyz+u5E1A9n3FG9cgaJSiYP8IG2RAkEA82GZ8rBkSGQQ +ZQ3oFuzPAAL21lbj8D0p76fsCpvS7427DtZDOjhOIKZmaeykpv+qSzRraqEqjDRi +S4kjQvwh6QJBAOKniZ+NDo2lSpbOFk+XlmABK1DormVpj8KebHEZYok1lRI+WiX9 +Nnoe9YLgix7++6H5SBBCcTB4HvM+5A4BuwMCQQChcX/eZbXP81iQwB3Rfzp8xnqY +icDf7qKvz9Ma4myU7Y5E9EpaB1mD/P14jDpYcMW050vNyqTfpiwB8TFL0NZpAkEA +02jkFH9UyMgZV6qo4tqI98l/ZrtyF8OrxSNSEPhVkZf6EQc5vN9/lc8Uv1vESEgb +3AwRrKDcxRH2BHtv6qSwkwJAGjqnkIcEkA75r1e55/EF2chcZW1+tpwKupE8CtAH +VXGd5DVwt4cYWkLUj2gF2fJbV97uu2MAg5CFDb+vQ6p5eA== +-----END RSA PRIVATE KEY----- diff --git a/dummyserver/certs/server.no_san.crt b/dummyserver/certs/server.no_san.crt new file mode 100644 index 0000000..cb89a14 --- /dev/null +++ b/dummyserver/certs/server.no_san.crt @@ -0,0 +1,16 @@ +-----BEGIN CERTIFICATE----- +MIIChzCCAfACCQCmk6is+6REjDANBgkqhkiG9w0BAQUFADCBhzELMAkGA1UEBhMC +Q0ExEDAOBgNVBAgMB09udGFyaW8xEDAOBgNVBAcMB09udGFyaW8xHzAdBgNVBAoM +FlNoYXpvdydzIFVzZWQgQ2FycyBJbmMxEjAQBgNVBAMMCWxvY2FsaG9zdDEfMB0G +CSqGSIb3DQEJARYQc2hhem93QGdtYWlsLmNvbTAeFw0xNDEyMDMyMjE3MjVaFw00 +NDEyMDIyMjE3MjVaMIGHMQswCQYDVQQGEwJDQTEQMA4GA1UECAwHT250YXJpbzEQ +MA4GA1UEBwwHT250YXJpbzEfMB0GA1UECgwWU2hhem93J3MgVXNlZCBDYXJzIElu +YzESMBAGA1UEAwwJbG9jYWxob3N0MR8wHQYJKoZIhvcNAQkBFhBzaGF6b3dAZ21h +aWwuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDXe3FqmCWvP8XPxqtT ++0bfL1Tvzvebi46k0WIcUV8bP3vyYiSRXG9ALmyzZH4GHY9UVs4OEDkCMDOBSezB +0y9ai/9doTNcaictdEBu8nfdXKoTtzrn+VX4UPrkH5hm7NQ1fTQuj1MR7yBCmYqN +3Q2Q+Efuujyx0FwBzAuy1aKYuwIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAHI/m9/O +bVR3zBOJZUKlHzTRvfYbYhhfrrcQlbwhjKqNyZcQTL/bJdtQSL19g3ftC5wZPI+y +66R24MqGmRcv5kT32HcuIK1Xhx4nDqTqnTNvGkaIh5CqS4DEP+iqtwDoEbQt8DwL +ejKtvZlyQRKFPTMtmv4VsTIHeVOAj+pXn595 +-----END CERTIFICATE----- diff --git a/dummyserver/certs/server.no_san.csr b/dummyserver/certs/server.no_san.csr new file mode 100644 index 0000000..d4bb7c3 --- /dev/null +++ b/dummyserver/certs/server.no_san.csr @@ -0,0 +1,12 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIByDCCATECAQAwgYcxCzAJBgNVBAYTAkNBMRAwDgYDVQQIDAdPbnRhcmlvMRAw +DgYDVQQHDAdPbnRhcmlvMR8wHQYDVQQKDBZTaGF6b3cncyBVc2VkIENhcnMgSW5j +MRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHNoYXpvd0BnbWFp +bC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANd7cWqYJa8/xc/Gq1P7 +Rt8vVO/O95uLjqTRYhxRXxs/e/JiJJFcb0AubLNkfgYdj1RWzg4QOQIwM4FJ7MHT +L1qL/12hM1xqJy10QG7yd91cqhO3Ouf5VfhQ+uQfmGbs1DV9NC6PUxHvIEKZio3d +DZD4R+66PLHQXAHMC7LVopi7AgMBAAGgADANBgkqhkiG9w0BAQUFAAOBgQDGWkxr +mCa2h+/HnptucimU+T4QESBNc3fHhnnWaj4RXJaS0xwUDaG81INnxj6KNVgOtemK +VlwG7Ziqj1i+gZ1UpbmMp1YkSD/0+N8vb2BStuXlc5rP0+cG1DlzV1Dc+FaDHHsy +7MfyeHTa5FYdSeKsiAFHlQ84g08Pd7hW0c+SxA== +-----END CERTIFICATE REQUEST----- diff --git a/dummyserver/handlers.pyc b/dummyserver/handlers.pyc deleted file mode 100644 index 22aedc3..0000000 Binary files a/dummyserver/handlers.pyc and /dev/null differ diff --git a/dummyserver/proxy.pyc b/dummyserver/proxy.pyc deleted file mode 100644 index 23fa01d..0000000 Binary files a/dummyserver/proxy.pyc and /dev/null differ diff --git a/dummyserver/server.py b/dummyserver/server.py index 99f0835..6ee9a5d 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -28,8 +28,13 @@ DEFAULT_CERTS = { 'certfile': os.path.join(CERTS_PATH, 'server.crt'), 'keyfile': os.path.join(CERTS_PATH, 'server.key'), } +NO_SAN_CERTS = { + 'certfile': os.path.join(CERTS_PATH, 'server.no_san.crt'), + 'keyfile': DEFAULT_CERTS['keyfile'] +} DEFAULT_CA = os.path.join(CERTS_PATH, 'cacert.pem') DEFAULT_CA_BAD = os.path.join(CERTS_PATH, 'client_bad.pem') +NO_SAN_CA = os.path.join(CERTS_PATH, 'cacert.no_san.pem') # Different types of servers we have: @@ -179,3 +184,17 @@ def get_unreachable_address(): return sockaddr else: s.close() + + +if __name__ == '__main__': + # For debugging dummyserver itself - python -m dummyserver.server + from .testcase import TestingApp + host = '127.0.0.1' + + io_loop = tornado.ioloop.IOLoop() + app = tornado.wsgi.WSGIContainer(TestingApp()) + server, port = run_tornado_app(app, io_loop, None, + 'http', host) + server_thread = run_loop_in_thread(io_loop) + + print("Listening on http://{host}:{port}".format(host=host, port=port)) diff --git a/dummyserver/server.pyc b/dummyserver/server.pyc deleted file mode 100644 index b997d0e..0000000 Binary files a/dummyserver/server.pyc and /dev/null differ diff --git a/dummyserver/testcase.pyc b/dummyserver/testcase.pyc deleted file mode 100644 index 29cc06a..0000000 Binary files a/dummyserver/testcase.pyc and /dev/null differ diff --git a/test/__init__.pyc b/test/__init__.pyc deleted file mode 100644 index 38b9317..0000000 Binary files a/test/__init__.pyc and /dev/null differ diff --git a/test/contrib/__init__.pyc b/test/contrib/__init__.pyc deleted file mode 100644 index 2d2fd5d..0000000 Binary files a/test/contrib/__init__.pyc and /dev/null differ diff --git a/test/contrib/test_pyopenssl.pyc b/test/contrib/test_pyopenssl.pyc deleted file mode 100644 index 6441273..0000000 Binary files a/test/contrib/test_pyopenssl.pyc and /dev/null differ diff --git a/test/port_helpers.pyc b/test/port_helpers.pyc deleted file mode 100644 index 7a1c425..0000000 Binary files a/test/port_helpers.pyc and /dev/null differ diff --git a/test/test_collections.pyc b/test/test_collections.pyc deleted file mode 100644 index d1ecd73..0000000 Binary files a/test/test_collections.pyc and /dev/null differ diff --git a/test/test_compatibility.pyc b/test/test_compatibility.pyc deleted file mode 100644 index 2dfdf75..0000000 Binary files a/test/test_compatibility.pyc and /dev/null differ diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 28fb89b..a6dbcf4 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -118,7 +118,7 @@ class TestConnectionPool(unittest.TestCase): str(MaxRetryError( HTTPConnectionPool(host='localhost'), "Test.", None)), "HTTPConnectionPool(host='localhost', port=None): " - "Max retries exceeded with url: Test. (Caused by redirect)") + "Max retries exceeded with url: Test. (Caused by None)") err = SocketError("Test") diff --git a/test/test_connectionpool.pyc b/test/test_connectionpool.pyc deleted file mode 100644 index e87a3b3..0000000 Binary files a/test/test_connectionpool.pyc and /dev/null differ diff --git a/test/test_exceptions.pyc b/test/test_exceptions.pyc deleted file mode 100644 index 3274e34..0000000 Binary files a/test/test_exceptions.pyc and /dev/null differ diff --git a/test/test_fields.pyc b/test/test_fields.pyc deleted file mode 100644 index 4622899..0000000 Binary files a/test/test_fields.pyc and /dev/null differ diff --git a/test/test_filepost.pyc b/test/test_filepost.pyc deleted file mode 100644 index ec54472..0000000 Binary files a/test/test_filepost.pyc and /dev/null differ diff --git a/test/test_poolmanager.pyc b/test/test_poolmanager.pyc deleted file mode 100644 index 077c2ac..0000000 Binary files a/test/test_poolmanager.pyc and /dev/null differ diff --git a/test/test_proxymanager.pyc b/test/test_proxymanager.pyc deleted file mode 100644 index 3696ee8..0000000 Binary files a/test/test_proxymanager.pyc and /dev/null differ diff --git a/test/test_response.pyc b/test/test_response.pyc deleted file mode 100644 index 99e5c0e..0000000 Binary files a/test/test_response.pyc and /dev/null differ diff --git a/test/test_retry.py b/test/test_retry.py index 7a3aa40..421e508 100644 --- a/test/test_retry.py +++ b/test/test_retry.py @@ -1,11 +1,13 @@ import unittest +from urllib3.response import HTTPResponse from urllib3.packages.six.moves import xrange from urllib3.util.retry import Retry from urllib3.exceptions import ( ConnectTimeoutError, + MaxRetryError, ReadTimeoutError, - MaxRetryError + ResponseError, ) @@ -154,3 +156,43 @@ class RetryTest(unittest.TestCase): def test_disabled(self): self.assertRaises(MaxRetryError, Retry(-1).increment) self.assertRaises(MaxRetryError, Retry(0).increment) + + def test_error_message(self): + retry = Retry(total=0) + try: + retry = retry.increment(error=ReadTimeoutError(None, "/", "read timed out")) + raise AssertionError("Should have raised a MaxRetryError") + except MaxRetryError as e: + assert 'Caused by redirect' not in str(e) + self.assertEqual(str(e.reason), 'None: read timed out') + + retry = Retry(total=1) + try: + retry = retry.increment('POST', '/') + retry = retry.increment('POST', '/') + raise AssertionError("Should have raised a MaxRetryError") + except MaxRetryError as e: + assert 'Caused by redirect' not in str(e) + self.assertTrue(isinstance(e.reason, ResponseError), + "%s should be a ResponseError" % e.reason) + self.assertEqual(str(e.reason), ResponseError.GENERIC_ERROR) + + retry = Retry(total=1) + try: + response = HTTPResponse(status=500) + retry = retry.increment('POST', '/', response=response) + retry = retry.increment('POST', '/', response=response) + raise AssertionError("Should have raised a MaxRetryError") + except MaxRetryError as e: + assert 'Caused by redirect' not in str(e) + msg = ResponseError.SPECIFIC_ERROR.format(status_code=500) + self.assertEqual(str(e.reason), msg) + + retry = Retry(connect=1) + try: + retry = retry.increment(error=ConnectTimeoutError('conntimeout')) + retry = retry.increment(error=ConnectTimeoutError('conntimeout')) + raise AssertionError("Should have raised a MaxRetryError") + except MaxRetryError as e: + assert 'Caused by redirect' not in str(e) + self.assertEqual(str(e.reason), 'conntimeout') diff --git a/test/test_retry.pyc b/test/test_retry.pyc deleted file mode 100644 index 398c010..0000000 Binary files a/test/test_retry.pyc and /dev/null differ diff --git a/test/test_util.py b/test/test_util.py index 1811dbd..c850d91 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -2,8 +2,9 @@ import warnings import logging import unittest import ssl +from itertools import chain -from mock import patch +from mock import patch, Mock from urllib3 import add_stderr_logger, disable_warnings from urllib3.util.request import make_headers @@ -14,14 +15,15 @@ from urllib3.util.url import ( split_first, Url, ) -from urllib3.util.ssl_ import resolve_cert_reqs +from urllib3.util.ssl_ import resolve_cert_reqs, ssl_wrap_socket from urllib3.exceptions import ( LocationParseError, TimeoutStateError, InsecureRequestWarning, + SSLError, ) -from urllib3.util import is_fp_closed +from urllib3.util import is_fp_closed, ssl_ from . import clear_warnings @@ -89,45 +91,61 @@ class TestUtil(unittest.TestCase): self.assertRaises(LocationParseError, get_host, location) - def test_parse_url(self): - url_host_map = { - 'http://google.com/mail': Url('http', host='google.com', path='/mail'), - 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), - 'google.com/mail': Url(host='google.com', path='/mail'), - 'http://google.com/': Url('http', host='google.com', path='/'), - 'http://google.com': Url('http', host='google.com'), - 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), - - # Path/query/fragment - '': Url(), - '/': Url(path='/'), - '?': Url(path='', query=''), - '#': Url(path='', fragment=''), - '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'), - '/foo': Url(path='/foo'), - '/foo?bar=baz': Url(path='/foo', query='bar=baz'), - '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), - - # Port - 'http://google.com/': Url('http', host='google.com', path='/'), - 'http://google.com:80/': Url('http', host='google.com', port=80, path='/'), - 'http://google.com:/': Url('http', host='google.com', path='/'), - 'http://google.com:80': Url('http', host='google.com', port=80), - 'http://google.com:': Url('http', host='google.com'), - - # Auth - 'http://foo:bar@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'), - 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'), - 'http://foo:bar@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'), - 'http://@': Url('http', host=None, auth='') + parse_url_host_map = { + 'http://google.com/mail': Url('http', host='google.com', path='/mail'), + 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), + 'google.com/mail': Url(host='google.com', path='/mail'), + 'http://google.com/': Url('http', host='google.com', path='/'), + 'http://google.com': Url('http', host='google.com'), + 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), + + # Path/query/fragment + '': Url(), + '/': Url(path='/'), + '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'), + '/foo': Url(path='/foo'), + '/foo?bar=baz': Url(path='/foo', query='bar=baz'), + '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), + + # Port + 'http://google.com/': Url('http', host='google.com', path='/'), + 'http://google.com:80/': Url('http', host='google.com', port=80, path='/'), + 'http://google.com:80': Url('http', host='google.com', port=80), + + # Auth + 'http://foo:bar@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'), + 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'), + 'http://foo:bar@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'), + 'http://@': Url('http', host=None, auth='') + } + + non_round_tripping_parse_url_host_map = { + # Path/query/fragment + '?': Url(path='', query=''), + '#': Url(path='', fragment=''), + + # Empty Port + 'http://google.com:': Url('http', host='google.com'), + 'http://google.com:/': Url('http', host='google.com', path='/'), + } - for url, expected_url in url_host_map.items(): - returned_url = parse_url(url) - self.assertEqual(returned_url, expected_url) + + def test_parse_url(self): + for url, expected_Url in chain(self.parse_url_host_map.items(), self.non_round_tripping_parse_url_host_map.items()): + returned_Url = parse_url(url) + self.assertEqual(returned_Url, expected_Url) + + def test_unparse_url(self): + for url, expected_Url in self.parse_url_host_map.items(): + self.assertEqual(url, expected_Url.url) def test_parse_url_invalid_IPv6(self): self.assertRaises(ValueError, parse_url, '[::1') + def test_Url_str(self): + U = Url('http', host='google.com') + self.assertEqual(str(U), U.url) + def test_request_uri(self): url_host_map = { 'http://google.com/mail': '/mail', @@ -333,7 +351,7 @@ class TestUtil(unittest.TestCase): return True self.assertTrue(is_fp_closed(ClosedFile())) - + def test_is_fp_closed_object_has_none_fp(self): class NoneFpFile(object): @property @@ -355,3 +373,30 @@ class TestUtil(unittest.TestCase): pass self.assertRaises(ValueError, is_fp_closed, NotReallyAFile()) + + def test_ssl_wrap_socket_loads_the_cert_chain(self): + socket = object() + mock_context = Mock() + ssl_wrap_socket(ssl_context=mock_context, sock=socket, + certfile='/path/to/certfile') + + mock_context.load_cert_chain.assert_called_once_with( + '/path/to/certfile', None) + + def test_ssl_wrap_socket_loads_verify_locations(self): + socket = object() + mock_context = Mock() + ssl_wrap_socket(ssl_context=mock_context, ca_certs='/path/to/pem', + sock=socket) + mock_context.load_verify_locations.assert_called_once_with( + '/path/to/pem') + + def test_ssl_wrap_socket_with_no_sni(self): + socket = object() + mock_context = Mock() + # Ugly preservation of original value + HAS_SNI = ssl_.HAS_SNI + ssl_.HAS_SNI = False + ssl_wrap_socket(ssl_context=mock_context, sock=socket) + mock_context.wrap_socket.assert_called_once_with(socket) + ssl_.HAS_SNI = HAS_SNI diff --git a/test/test_util.pyc b/test/test_util.pyc deleted file mode 100644 index 0500c3b..0000000 Binary files a/test/test_util.pyc and /dev/null differ diff --git a/test/with_dummyserver/__init__.pyc b/test/with_dummyserver/__init__.pyc deleted file mode 100644 index 833be60..0000000 Binary files a/test/with_dummyserver/__init__.pyc and /dev/null differ diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 7d54fbf..cc0f011 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -13,8 +13,7 @@ except: from urllib import urlencode from .. import ( - requires_network, - onlyPy3, onlyPy27OrNewer, onlyPy26OrOlder, + requires_network, onlyPy3, onlyPy26OrOlder, TARPIT_HOST, VALID_SOURCE_ADDRESSES, INVALID_SOURCE_ADDRESSES, ) from ..port_helpers import find_unused_port @@ -99,6 +98,13 @@ class TestConnectionPool(HTTPDummyServerTestCase): r = self.pool.request('POST', '/echo', fields=fields) self.assertEqual(r.data.count(b'name="foo"'), 2) + def test_request_method_body(self): + body = b'hi' + r = self.pool.request('POST', '/echo', body=body) + self.assertEqual(r.data, body) + + fields = [('hi', 'hello')] + self.assertRaises(TypeError, self.pool.request, 'POST', '/echo', body=body, fields=fields) def test_unicode_upload(self): fieldname = u('myfile') @@ -189,7 +195,7 @@ class TestConnectionPool(HTTPDummyServerTestCase): @timed(0.5) def test_timeout(self): """ Requests should time out when expected """ - url = '/sleep?seconds=0.002' + url = '/sleep?seconds=0.003' timeout = Timeout(read=0.001) # Pool-global timeout diff --git a/test/with_dummyserver/test_connectionpool.pyc b/test/with_dummyserver/test_connectionpool.pyc deleted file mode 100644 index b8c38e9..0000000 Binary files a/test/with_dummyserver/test_connectionpool.pyc and /dev/null differ diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index cf3eee7..16ca589 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -9,7 +9,8 @@ import mock from nose.plugins.skip import SkipTest from dummyserver.testcase import HTTPSDummyServerTestCase -from dummyserver.server import DEFAULT_CA, DEFAULT_CA_BAD, DEFAULT_CERTS +from dummyserver.server import (DEFAULT_CA, DEFAULT_CA_BAD, DEFAULT_CERTS, + NO_SAN_CERTS, NO_SAN_CA) from test import ( onlyPy26OrOlder, @@ -168,7 +169,7 @@ class TestHTTPS(HTTPSDummyServerTestCase): https_pool.request('HEAD', '/') def test_assert_hostname_false(self): - https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + https_pool = HTTPSConnectionPool('localhost', self.port, cert_reqs='CERT_REQUIRED', ca_certs=DEFAULT_CA) @@ -176,7 +177,7 @@ class TestHTTPS(HTTPSDummyServerTestCase): https_pool.request('GET', '/') def test_assert_specific_hostname(self): - https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + https_pool = HTTPSConnectionPool('localhost', self.port, cert_reqs='CERT_REQUIRED', ca_certs=DEFAULT_CA) @@ -184,7 +185,7 @@ class TestHTTPS(HTTPSDummyServerTestCase): https_pool.request('GET', '/') def test_assert_fingerprint_md5(self): - https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + https_pool = HTTPSConnectionPool('localhost', self.port, cert_reqs='CERT_REQUIRED', ca_certs=DEFAULT_CA) @@ -193,7 +194,7 @@ class TestHTTPS(HTTPSDummyServerTestCase): https_pool.request('GET', '/') def test_assert_fingerprint_sha1(self): - https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + https_pool = HTTPSConnectionPool('localhost', self.port, cert_reqs='CERT_REQUIRED', ca_certs=DEFAULT_CA) @@ -329,6 +330,8 @@ class TestHTTPS(HTTPSDummyServerTestCase): https_pool._make_request(conn, 'GET', '/') def test_ssl_correct_system_time(self): + self._pool.cert_reqs = 'CERT_REQUIRED' + self._pool.ca_certs = DEFAULT_CA with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') self._pool.request('GET', '/') @@ -336,6 +339,8 @@ class TestHTTPS(HTTPSDummyServerTestCase): self.assertEqual([], w) def test_ssl_wrong_system_time(self): + self._pool.cert_reqs = 'CERT_REQUIRED' + self._pool.ca_certs = DEFAULT_CA with mock.patch('urllib3.connection.datetime') as mock_date: mock_date.date.today.return_value = datetime.date(1970, 1, 1) @@ -369,6 +374,27 @@ class TestHTTPS_TLSv1(HTTPSDummyServerTestCase): self._pool.ssl_version = 'SSLv3' self.assertRaises(SSLError, self._pool.request, 'GET', '/') + def test_discards_connection_on_sslerror(self): + self._pool.cert_reqs = 'CERT_REQUIRED' + self.assertRaises(SSLError, self._pool.request, 'GET', '/') + self._pool.ca_certs = DEFAULT_CA + self._pool.request('GET', '/') + + +class TestHTTPS_NoSAN(HTTPSDummyServerTestCase): + certs = NO_SAN_CERTS + + def test_warning_for_certs_without_a_san(self): + """Ensure that a warning is raised when the cert from the server has + no Subject Alternative Name.""" + with mock.patch('warnings.warn') as warn: + https_pool = HTTPSConnectionPool(self.host, self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=NO_SAN_CA) + r = https_pool.request('GET', '/') + self.assertEqual(r.status, 200) + self.assertTrue(warn.called) + if __name__ == '__main__': unittest.main() diff --git a/test/with_dummyserver/test_https.pyc b/test/with_dummyserver/test_https.pyc deleted file mode 100644 index 6d85316..0000000 Binary files a/test/with_dummyserver/test_https.pyc and /dev/null differ diff --git a/test/with_dummyserver/test_poolmanager.pyc b/test/with_dummyserver/test_poolmanager.pyc deleted file mode 100644 index 26c52e9..0000000 Binary files a/test/with_dummyserver/test_poolmanager.pyc and /dev/null differ diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py index 61eedf1..df300fe 100644 --- a/test/with_dummyserver/test_proxy_poolmanager.py +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -1,13 +1,17 @@ -import unittest import json import socket +import unittest + +from nose.tools import timed from dummyserver.testcase import HTTPDummyProxyTestCase from dummyserver.server import ( DEFAULT_CA, DEFAULT_CA_BAD, get_unreachable_address) +from .. import TARPIT_HOST from urllib3.poolmanager import proxy_from_url, ProxyManager -from urllib3.exceptions import MaxRetryError, SSLError, ProxyError +from urllib3.exceptions import ( + MaxRetryError, SSLError, ProxyError, ConnectTimeoutError) from urllib3.connectionpool import connection_from_url, VerifiedHTTPSConnection @@ -259,5 +263,25 @@ class TestHTTPProxyManager(HTTPDummyProxyTestCase): self.assertEqual(sc3,sc4) + @timed(0.5) + def test_https_proxy_timeout(self): + https = proxy_from_url('https://{host}'.format(host=TARPIT_HOST)) + try: + https.request('GET', self.http_url, timeout=0.001) + self.fail("Failed to raise retry error.") + except MaxRetryError as e: + assert isinstance(e.reason, ConnectTimeoutError) + + + @timed(0.5) + def test_https_proxy_pool_timeout(self): + https = proxy_from_url('https://{host}'.format(host=TARPIT_HOST), + timeout=0.001) + try: + https.request('GET', self.http_url) + self.fail("Failed to raise retry error.") + except MaxRetryError as e: + assert isinstance(e.reason, ConnectTimeoutError) + if __name__ == '__main__': unittest.main() diff --git a/test/with_dummyserver/test_proxy_poolmanager.pyc b/test/with_dummyserver/test_proxy_poolmanager.pyc deleted file mode 100644 index 12c320c..0000000 Binary files a/test/with_dummyserver/test_proxy_poolmanager.pyc and /dev/null differ diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py index e1ac1c6..c1ef1be 100644 --- a/test/with_dummyserver/test_socketlevel.py +++ b/test/with_dummyserver/test_socketlevel.py @@ -137,6 +137,24 @@ class TestSocketClosing(SocketDummyServerTestCase): finally: timed_out.set() + def test_https_connection_read_timeout(self): + """ Handshake timeouts should fail with a Timeout""" + timed_out = Event() + def socket_handler(listener): + sock = listener.accept()[0] + while not sock.recv(65536): + pass + + timed_out.wait() + sock.close() + + self._start_server(socket_handler) + pool = HTTPSConnectionPool(self.host, self.port, timeout=0.001, retries=False) + try: + self.assertRaises(ReadTimeoutError, pool.request, 'GET', '/') + finally: + timed_out.set() + def test_timeout_errors_cause_retries(self): def socket_handler(listener): sock_timeout = listener.accept()[0] diff --git a/test/with_dummyserver/test_socketlevel.pyc b/test/with_dummyserver/test_socketlevel.pyc deleted file mode 100644 index ba3b19e..0000000 Binary files a/test/with_dummyserver/test_socketlevel.pyc and /dev/null differ diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 964cd4b..7b5cf18 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.9.1 +Version: 1.10 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -156,6 +156,38 @@ Description: ======= Changes ======= + 1.10 (2014-12-14) + +++++++++++++++++ + + * Disabled SSLv3. (Issue #473) + + * Add ``Url.url`` property to return the composed url string. (Issue #394) + + * Fixed PyOpenSSL + gevent ``WantWriteError``. (Issue #412) + + * ``MaxRetryError.reason`` will always be an exception, not string. + (Issue #481) + + * Fixed SSL-related timeouts not being detected as timeouts. (Issue #492) + + * Py3: Use ``ssl.create_default_context()`` when available. (Issue #473) + + * Emit ``InsecureRequestWarning`` for *every* insecure HTTPS request. + (Issue #496) + + * Emit ``SecurityWarning`` when certificate has no ``subjectAltName``. + (Issue #499) + + * Close and discard sockets which experienced SSL-related errors. + (Issue #501) + + * Handle ``body`` param in ``.request(...)``. (Issue #513) + + * Respect timeout with HTTPS proxy. (Issue #505) + + * PyOpenSSL: Handle ZeroReturnError exception. (Issue #520) + + 1.9.1 (2014-09-13) ++++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 2f0b5fc..6cb0fcf 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -21,16 +21,13 @@ docs/managers.rst docs/pools.rst docs/security.rst dummyserver/__init__.py -dummyserver/__init__.pyc dummyserver/handlers.py -dummyserver/handlers.pyc dummyserver/proxy.py -dummyserver/proxy.pyc dummyserver/server.py -dummyserver/server.pyc dummyserver/testcase.py -dummyserver/testcase.pyc +dummyserver/certs/README.rst dummyserver/certs/cacert.key +dummyserver/certs/cacert.no_san.pem dummyserver/certs/cacert.pem dummyserver/certs/client.csr dummyserver/certs/client.key @@ -40,49 +37,30 @@ dummyserver/certs/server.crt dummyserver/certs/server.csr dummyserver/certs/server.key dummyserver/certs/server.key.org +dummyserver/certs/server.no_san.crt +dummyserver/certs/server.no_san.csr test/__init__.py -test/__init__.pyc test/benchmark.py test/port_helpers.py -test/port_helpers.pyc test/test_collections.py -test/test_collections.pyc test/test_compatibility.py -test/test_compatibility.pyc test/test_connectionpool.py -test/test_connectionpool.pyc test/test_exceptions.py -test/test_exceptions.pyc test/test_fields.py -test/test_fields.pyc test/test_filepost.py -test/test_filepost.pyc test/test_poolmanager.py -test/test_poolmanager.pyc test/test_proxymanager.py -test/test_proxymanager.pyc test/test_response.py -test/test_response.pyc test/test_retry.py -test/test_retry.pyc test/test_util.py -test/test_util.pyc test/contrib/__init__.py -test/contrib/__init__.pyc test/contrib/test_pyopenssl.py -test/contrib/test_pyopenssl.pyc test/with_dummyserver/__init__.py -test/with_dummyserver/__init__.pyc test/with_dummyserver/test_connectionpool.py -test/with_dummyserver/test_connectionpool.pyc test/with_dummyserver/test_https.py -test/with_dummyserver/test_https.pyc test/with_dummyserver/test_poolmanager.py -test/with_dummyserver/test_poolmanager.pyc test/with_dummyserver/test_proxy_poolmanager.py -test/with_dummyserver/test_proxy_poolmanager.pyc test/with_dummyserver/test_socketlevel.py -test/with_dummyserver/test_socketlevel.pyc urllib3/__init__.py urllib3/_collections.py urllib3/connection.py diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 3546d13..4f9d4a7 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.9.1' +__version__ = '1.10' from .connectionpool import ( @@ -55,9 +55,9 @@ def add_stderr_logger(level=logging.DEBUG): del NullHandler -# Set security warning to only go off once by default. +# Set security warning to always go off by default. import warnings -warnings.simplefilter('module', exceptions.SecurityWarning) +warnings.simplefilter('always', exceptions.SecurityWarning) def disable_warnings(category=exceptions.HTTPWarning): """ diff --git a/urllib3/_collections.py b/urllib3/_collections.py index d77ebb8..784342a 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -14,7 +14,7 @@ try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict -from .packages.six import itervalues +from .packages.six import iterkeys, itervalues __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -85,8 +85,7 @@ class RecentlyUsedContainer(MutableMapping): def clear(self): with self.lock: # Copy pointers to all values, then wipe the mapping - # under Python 2, this copies the list of values twice :-| - values = list(self._container.values()) + values = list(itervalues(self._container)) self._container.clear() if self.dispose_func: @@ -95,7 +94,7 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: - return self._container.keys() + return list(iterkeys(self._container)) class HTTPHeaderDict(MutableMapping): diff --git a/urllib3/connection.py b/urllib3/connection.py index cebdd86..e5de769 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -38,6 +38,7 @@ except NameError: # Python 2: from .exceptions import ( ConnectTimeoutError, SystemTimeWarning, + SecurityWarning, ) from .packages.ssl_match_hostname import match_hostname @@ -241,8 +242,15 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.assert_fingerprint) elif resolved_cert_reqs != ssl.CERT_NONE \ and self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or hostname) + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn(( + 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' + 'This feature is being removed by major browsers and deprecated by RFC 2818. ' + '(See https://github.com/shazow/urllib3/issues/497 for details.)'), + SecurityWarning + ) + match_hostname(cert, self.assert_hostname or hostname) self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED or self.assert_fingerprint is not None) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index ac6e0ca..8bdf228 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -266,6 +266,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ pass + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -278,6 +282,23 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # can be removed later return Timeout.from_float(timeout) + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ @@ -301,7 +322,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.timeout = timeout_obj.connect_timeout # Trigger any extra validation we need to do. - self._validate_conn(conn) + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. @@ -331,28 +357,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() - except SocketTimeout: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - except BaseSSLError as e: - # Catch possible read timeouts thrown as SSL errors. If not the - # case, rethrow the original. We need to do this because of: - # http://bugs.python.org/issue10272 - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - raise - - except SocketError as e: # Platform-specific: Python 2 - # See the above comment about EAGAIN in Python 3. In Python 2 we - # have to specifically catch it and throw the timeout error - if e.errno in _blocking_errnos: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. @@ -508,11 +514,18 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) conn = self._get_conn(timeout=pool_timeout) + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None) + if is_new_proxy_conn: + self._prepare_proxy(conn) + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, - timeout=timeout, + timeout=timeout_obj, body=body, headers=headers) # If we're going to release the connection in ``finally:``, then @@ -537,9 +550,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise EmptyPoolError(self, "No pool connections are available.") except (BaseSSLError, CertificateError) as e: - # Release connection unconditionally because there is no way to - # close it externally in case of exception. - release_conn = True + # Close the connection. If a connection is reused on which there + # was a Certificate error, the next request will certainly raise + # another Certificate error. + if conn: + conn.close() + conn = None raise SSLError(e) except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: @@ -668,23 +684,25 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - if self.proxy is not None: - # Python 2.7+ - try: - set_tunnel = conn.set_tunnel - except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = conn._set_tunnel + return conn - if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older - set_tunnel(self.host, self.port) - else: - set_tunnel(self.host, self.port, self.proxy_headers) + def _prepare_proxy(self, conn): + """ + Establish tunnel connection early, because otherwise httplib + would improperly set Host: header to proxy's IP:port. + """ + # Python 2.7+ + try: + set_tunnel = conn.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = conn._set_tunnel - # Establish tunnel connection early, because otherwise httplib - # would improperly set Host: header to proxy's IP:port. - conn.connect() + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) - return conn + conn.connect() def _new_conn(self): """ @@ -725,8 +743,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): warnings.warn(( 'Unverified HTTPS request is being made. ' 'Adding certificate verification is strongly advised. See: ' - 'https://urllib3.readthedocs.org/en/latest/security.html ' - '(This warning will only appear once by default.)'), + 'https://urllib3.readthedocs.org/en/latest/security.html'), InsecureRequestWarning) diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 8475eeb..ee657fb 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -70,9 +70,14 @@ HAS_SNI = SUBJ_ALT_NAME_SUPPORT # Map from urllib3 to PyOpenSSL compatible parameter-values. _openssl_versions = { ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, } + +try: + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) +except AttributeError: + pass + _openssl_verify = { ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, @@ -186,6 +191,11 @@ class WrappedSocket(object): return b'' else: raise + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + else: + raise except OpenSSL.SSL.WantReadError: rd, wd, ed = select.select( [self.socket], [], [], self.socket.gettimeout()) @@ -199,8 +209,21 @@ class WrappedSocket(object): def settimeout(self, timeout): return self.socket.settimeout(timeout) + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + _, wlist, _ = select.select([], [self.socket], [], + self.socket.gettimeout()) + if not wlist: + raise timeout() + continue + def sendall(self, data): - return self.connection.sendall(data) + while len(data): + sent = self._send_until_done(data) + data = data[sent:] def close(self): if self._makefile_refs < 1: @@ -248,6 +271,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ssl_version=None): ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) if certfile: + keyfile = keyfile or certfile # Match behaviour of the normal python ssl library ctx.use_certificate_file(certfile) if keyfile: ctx.use_privatekey_file(keyfile) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 7519ba9..0c6fd3c 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -72,11 +72,8 @@ class MaxRetryError(RequestError): def __init__(self, pool, url, reason=None): self.reason = reason - message = "Max retries exceeded with url: %s" % url - if reason: - message += " (Caused by %r)" % reason - else: - message += " (Caused by redirect)" + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) RequestError.__init__(self, pool, url, message) @@ -141,6 +138,12 @@ class LocationParseError(LocationValueError): self.location = location +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + class SecurityWarning(HTTPWarning): "Warned when perfoming security reducing actions" pass diff --git a/urllib3/request.py b/urllib3/request.py index 51fe238..b08d6c9 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -118,18 +118,24 @@ class RequestMethods(object): which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ - if encode_multipart: - body, content_type = encode_multipart_formdata( - fields or {}, boundary=multipart_boundary) - else: - body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') - if headers is None: headers = self.headers - headers_ = {'Content-Type': content_type} - headers_.update(headers) + extra_kw = {'headers': {}} + + if fields: + if 'body' in urlopen_kw: + raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') + + if encode_multipart: + body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) + else: + body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' + + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) - return self.urlopen(method, url, body=body, headers=headers_, - **urlopen_kw) + return self.urlopen(method, url, **extra_kw) diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index eb560df..7e0959d 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -2,10 +2,11 @@ import time import logging from ..exceptions import ( - ProtocolError, ConnectTimeoutError, - ReadTimeoutError, MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, ) from ..packages import six @@ -36,7 +37,6 @@ class Retry(object): Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless retries are disabled, in which case the causing exception will be raised. - :param int total: Total number of retries to allow. Takes precedence over other counts. @@ -184,13 +184,13 @@ class Retry(object): return isinstance(err, ConnectTimeoutError) def _is_read_error(self, err): - """ Errors that occur after the request has been started, so we can't - assume that the server did not process any of it. + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. """ return isinstance(err, (ReadTimeoutError, ProtocolError)) def is_forced_retry(self, method, status_code): - """ Is this method/response retryable? (Based on method/codes whitelists) + """ Is this method/status code retryable? (Based on method/codes whitelists) """ if self.method_whitelist and method.upper() not in self.method_whitelist: return False @@ -198,8 +198,7 @@ class Retry(object): return self.status_forcelist and status_code in self.status_forcelist def is_exhausted(self): - """ Are we out of retries? - """ + """ Are we out of retries? """ retry_counts = (self.total, self.connect, self.read, self.redirect) retry_counts = list(filter(None, retry_counts)) if not retry_counts: @@ -230,6 +229,7 @@ class Retry(object): connect = self.connect read = self.read redirect = self.redirect + cause = 'unknown' if error and self._is_connection_error(error): # Connect retry? @@ -251,10 +251,16 @@ class Retry(object): # Redirect retry? if redirect is not None: redirect -= 1 + cause = 'too many redirects' else: - # FIXME: Nothing changed, scenario doesn't make sense. + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist _observed_errors += 1 + cause = ResponseError.GENERIC_ERROR + if response and response.status: + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) new_retry = self.new( total=total, @@ -262,7 +268,7 @@ class Retry(object): _observed_errors=_observed_errors) if new_retry.is_exhausted(): - raise MaxRetryError(_pool, url, error) + raise MaxRetryError(_pool, url, error or ResponseError(cause)) log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py index 9cfe2d2..a788b1b 100644 --- a/urllib3/util/ssl_.py +++ b/urllib3/util/ssl_.py @@ -4,18 +4,84 @@ from hashlib import md5, sha1 from ..exceptions import SSLError -try: # Test for SSL features - SSLContext = None - HAS_SNI = False +SSLContext = None +HAS_SNI = False +create_default_context = None + +import errno +import ssl - import ssl +try: # Test for SSL features from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? from ssl import HAS_SNI # Has SNI? except ImportError: pass +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +try: + from ssl import _DEFAULT_CIPHERS +except ImportError: + _DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:ECDH+RC4:' + 'DH+RC4:RSA+RC4:!aNULL:!eNULL:!MD5' + ) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = sys.version_info >= (2, 7) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, location): + self.ca_certs = location + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None): + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + def assert_fingerprint(cert, fingerprint): """ Checks if given fingerprint matches the supplied certificate. @@ -91,42 +157,98 @@ def resolve_ssl_version(candidate): return candidate -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION - - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError +def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or _DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + context.check_hostname = (context.verify_mode == ssl.CERT_REQUIRED) + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None): + """ + All arguments except for server_hostname and ssl_context have the same + meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + """ + context = ssl_context + if context is None: + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) + raise + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 487d456..b2ec834 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -40,6 +40,48 @@ class Url(namedtuple('Url', url_attrs)): return '%s:%d' % (self.host, self.port) return self.host + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url def split_first(s, delims): """ @@ -84,7 +126,7 @@ def parse_url(url): Example:: >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) >>> parse_url('/foo?bar') @@ -162,7 +204,6 @@ def parse_url(url): return Url(scheme, auth, host, port, path, query, fragment) - def get_host(url): """ Deprecated. Use :func:`.parse_url` instead. -- cgit v1.2.3