From 8c75b24f72cfca10c5346bc26ef536dd2e7e94e3 Mon Sep 17 00:00:00 2001 From: Daniele Tricoli Date: Fri, 9 Nov 2012 02:10:26 +0000 Subject: New upstream release --- debian/changelog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/debian/changelog b/debian/changelog index 2ac55dd..7616136 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +python-urllib3 (1.5-1) UNRELEASED; urgency=low + + * New upstream release + + -- Daniele Tricoli Fri, 09 Nov 2012 01:22:43 +0100 + python-urllib3 (1.3-3) unstable; urgency=low * debian/control -- cgit v1.2.3 From a1e3ebdf80fa969017f7197933e160b62b132966 Mon Sep 17 00:00:00 2001 From: Daniele Tricoli Date: Fri, 9 Nov 2012 02:21:08 +0000 Subject: Refreshed --- .../01_do-not-use-embedded-python-six.patch | 48 +++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/debian/patches/01_do-not-use-embedded-python-six.patch b/debian/patches/01_do-not-use-embedded-python-six.patch index 6729f1d..111513d 100644 --- a/debian/patches/01_do-not-use-embedded-python-six.patch +++ b/debian/patches/01_do-not-use-embedded-python-six.patch @@ -1,73 +1,73 @@ Description: Do not use embedded copy of python-six. Author: Daniele Tricoli Forwarded: not-needed -Last-Update: 2012-04-17 +Last-Update: 2012-11-09 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -1,7 +1,7 @@ import unittest - + from urllib3._collections import RecentlyUsedContainer as Container -from urllib3.packages import six +import six xrange = six.moves.xrange - - + + --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py -@@ -51,7 +51,7 @@ +@@ -52,7 +52,7 @@ ) - + from .packages.ssl_match_hostname import match_hostname, CertificateError -from .packages import six +import six - - + + xrange = six.moves.xrange --- a/urllib3/filepost.py +++ b/urllib3/filepost.py -@@ -14,8 +14,8 @@ - +@@ -10,8 +10,8 @@ + from uuid import uuid4 from io import BytesIO - + -from .packages import six -from .packages.six import b +import six +from six import b - + writer = codecs.lookup('utf-8')[3] - + --- a/urllib3/response.py +++ b/urllib3/response.py @@ -11,7 +11,7 @@ from io import BytesIO - - from .exceptions import HTTPError + + from .exceptions import DecodeError -from .packages.six import string_types as basestring +from six import string_types as basestring - - + + log = logging.getLogger(__name__) --- a/urllib3/util.py +++ b/urllib3/util.py -@@ -16,7 +16,7 @@ +@@ -18,7 +18,7 @@ except ImportError: # `select` doesn't exist on AppEngine. select = False - + -from .packages import six +import six from .exceptions import LocationParseError - - + + --- a/test/test_filepost.py +++ b/test/test_filepost.py @@ -1,7 +1,7 @@ import unittest - + from urllib3.filepost import encode_multipart_formdata, iter_fields -from urllib3.packages.six import b, u +from six import b, u - - + + BOUNDARY = '!! test boundary !!' -- cgit v1.2.3 From 052e218269d5b318da94b94e36204a4229a61a84 Mon Sep 17 00:00:00 2001 From: Daniele Tricoli Date: Fri, 9 Nov 2012 03:16:33 +0000 Subject: Run tests only for python2.7 since upstream is using assertRaises() as a context manager --- debian/changelog | 7 ++++++- debian/rules | 7 +++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/debian/changelog b/debian/changelog index 7616136..19da4cc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,13 @@ python-urllib3 (1.5-1) UNRELEASED; urgency=low * New upstream release + * debian/patches/01_do-not-use-embedded-python-six.patch + - Refreshed + * debian/rules + - Run tests only for python2.7 since upstream is using + assertRaises() as a context manager - -- Daniele Tricoli Fri, 09 Nov 2012 01:22:43 +0100 + -- Daniele Tricoli Fri, 09 Nov 2012 04:15:52 +0100 python-urllib3 (1.3-3) unstable; urgency=low diff --git a/debian/rules b/debian/rules index 6207fb9..e2ed423 100755 --- a/debian/rules +++ b/debian/rules @@ -43,10 +43,9 @@ override_dh_auto_test: ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) # Python3 testing is not possible at the moment because missing # dependencies: python3-coverage. - set -ex; \ - for python in $(PYVERS); do \ - $$python /usr/bin/nosetests; \ - done + # Upstream is using a python2.7 features: assertRaises() as a context + # manager + set -ex; python2.7 /usr/bin/nosetests endif override_dh_installchangelogs: -- cgit v1.2.3 From faa58a491ea9da30d0be4300d6485b819431fb74 Mon Sep 17 00:00:00 2001 From: Daniele Tricoli Date: Fri, 9 Nov 2012 03:30:46 +0000 Subject: Bumped Standards-Version to 3.9.4 (no changes needed) --- debian/changelog | 4 +++- debian/control | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 19da4cc..7237e6d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,13 +1,15 @@ python-urllib3 (1.5-1) UNRELEASED; urgency=low * New upstream release + * debian/control + - Bumped Standards-Version to 3.9.4 (no changes needed) * debian/patches/01_do-not-use-embedded-python-six.patch - Refreshed * debian/rules - Run tests only for python2.7 since upstream is using assertRaises() as a context manager - -- Daniele Tricoli Fri, 09 Nov 2012 04:15:52 +0100 + -- Daniele Tricoli Fri, 09 Nov 2012 04:23:18 +0100 python-urllib3 (1.3-3) unstable; urgency=low diff --git a/debian/control b/debian/control index 2e522c7..87cd849 100644 --- a/debian/control +++ b/debian/control @@ -12,7 +12,7 @@ Build-Depends: python-six, python-tornado, python3-all -Standards-Version: 3.9.3 +Standards-Version: 3.9.4 X-Python-Version: >= 2.6 X-Python3-Version: >= 3.0 Homepage: http://urllib3.readthedocs.org -- cgit v1.2.3 From 109dfea6d08e36b186782eacaff60c7d8744b364 Mon Sep 17 00:00:00 2001 From: Piotr Ożarowski Date: Sun, 11 Nov 2012 20:57:10 +0000 Subject: upload to experimental due to Wheezy freeze --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 7237e6d..8d90785 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -python-urllib3 (1.5-1) UNRELEASED; urgency=low +python-urllib3 (1.5-1) experimental; urgency=low * New upstream release * debian/control -- cgit v1.2.3 From 0c183b9d52b45bac22a2ff9db0e6348b655f4ab2 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:30 -0700 Subject: Imported Upstream version 1.2.2 --- CHANGES.rst | 146 +++++ CONTRIBUTORS.txt | 43 ++ LICENSE.txt | 19 + MANIFEST.in | 1 + PKG-INFO | 265 +++++++++ README.rst | 97 ++++ dummyserver/__init__.py | 0 dummyserver/handlers.py | 159 ++++++ dummyserver/server.py | 113 ++++ dummyserver/testcase.py | 71 +++ setup.cfg | 10 + setup.py | 53 ++ test-requirements.txt | 2 + test/__init__.py | 0 test/benchmark.py | 77 +++ test/test_collections.py | 111 ++++ test/test_connectionpool.py | 136 +++++ test/test_poolmanager.py | 47 ++ test/test_response.py | 68 +++ urllib3.egg-info/PKG-INFO | 265 +++++++++ urllib3.egg-info/SOURCES.txt | 36 ++ urllib3.egg-info/dependency_links.txt | 1 + urllib3.egg-info/top_level.txt | 2 + urllib3/__init__.py | 48 ++ urllib3/_collections.py | 131 +++++ urllib3/connectionpool.py | 629 +++++++++++++++++++++ urllib3/contrib/__init__.py | 0 urllib3/contrib/ntlmpool.py | 120 ++++ urllib3/exceptions.py | 67 +++ urllib3/filepost.py | 74 +++ urllib3/packages/__init__.py | 4 + .../packages/mimetools_choose_boundary/__init__.py | 47 ++ urllib3/packages/six.py | 372 ++++++++++++ urllib3/packages/ssl_match_hostname/__init__.py | 61 ++ urllib3/poolmanager.py | 138 +++++ urllib3/request.py | 147 +++++ urllib3/response.py | 191 +++++++ 37 files changed, 3751 insertions(+) create mode 100644 CHANGES.rst create mode 100644 CONTRIBUTORS.txt create mode 100644 LICENSE.txt create mode 100644 MANIFEST.in create mode 100644 PKG-INFO create mode 100644 README.rst create mode 100644 dummyserver/__init__.py create mode 100644 dummyserver/handlers.py create mode 100755 dummyserver/server.py create mode 100644 dummyserver/testcase.py create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 test-requirements.txt create mode 100644 test/__init__.py create mode 100644 test/benchmark.py create mode 100644 test/test_collections.py create mode 100644 test/test_connectionpool.py create mode 100644 test/test_poolmanager.py create mode 100644 test/test_response.py create mode 100644 urllib3.egg-info/PKG-INFO create mode 100644 urllib3.egg-info/SOURCES.txt create mode 100644 urllib3.egg-info/dependency_links.txt create mode 100644 urllib3.egg-info/top_level.txt create mode 100644 urllib3/__init__.py create mode 100644 urllib3/_collections.py create mode 100644 urllib3/connectionpool.py create mode 100644 urllib3/contrib/__init__.py create mode 100644 urllib3/contrib/ntlmpool.py create mode 100644 urllib3/exceptions.py create mode 100644 urllib3/filepost.py create mode 100644 urllib3/packages/__init__.py create mode 100644 urllib3/packages/mimetools_choose_boundary/__init__.py create mode 100644 urllib3/packages/six.py create mode 100644 urllib3/packages/ssl_match_hostname/__init__.py create mode 100644 urllib3/poolmanager.py create mode 100644 urllib3/request.py create mode 100644 urllib3/response.py diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..d998db8 --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,146 @@ +Changes +======= + + +1.2.2 (2012-02-06) +++++++++++++++++++ + +* Fixed packaging bug of not shipping ``test-requirements.txt``. (Issue #47) + + +1.2.1 (2012-02-05) +++++++++++++++++++ + +* Fixed another bug related to when ``ssl`` module is not available. (Issue #41) + +* Location parsing errors now raise ``urllib3.exceptions.LocationParseError`` + which inherits from ``ValueError``. + + +1.2 (2012-01-29) +++++++++++++++++ + +* Added Python 3 support (tested on 3.2.2) + +* Dropped Python 2.5 support (tested on 2.6.7, 2.7.2) + +* Use ``select.poll`` instead of ``select.select`` for platforms that support + it. + +* Use ``Queue.LifoQueue`` instead of ``Queue.Queue`` for more aggressive + connection reusing. Configurable by overriding ``ConnectionPool.QueueCls``. + +* Fixed ``ImportError`` during install when ``ssl`` module is not available. + (Issue #41) + +* Fixed ``PoolManager`` redirects between schemes (such as HTTP -> HTTPS) not + completing properly. (Issue #28, uncovered by Issue #10 in v1.1) + +* Ported ``dummyserver`` to use ``tornado`` instead of ``webob`` + + ``eventlet``. Removed extraneous unsupported dummyserver testing backends. + Added socket-level tests. + +* More tests. Achievement Unlocked: 99% Coverage. + + +1.1 (2012-01-07) +++++++++++++++++ + +* Refactored ``dummyserver`` to its own root namespace module (used for + testing). + +* Added hostname verification for ``VerifiedHTTPSConnection`` by vendoring in + Py32's ``ssl_match_hostname``. (Issue #25) + +* Fixed cross-host HTTP redirects when using ``PoolManager``. (Issue #10) + +* Fixed ``decode_content`` being ignored when set through ``urlopen``. (Issue + #27) + +* Fixed timeout-related bugs. (Issues #17, #23) + + +1.0.2 (2011-11-04) +++++++++++++++++++ + +* Fixed typo in ``VerifiedHTTPSConnection`` which would only present as a bug if + you're using the object manually. (Thanks pyos) + +* Made RecentlyUsedContainer (and consequently PoolManager) more thread-safe by + wrapping the access log in a mutex. (Thanks @christer) + +* Made RecentlyUsedContainer more dict-like (corrected ``__delitem__`` and + ``__getitem__`` behaviour), with tests. Shouldn't affect core urllib3 code. + + +1.0.1 (2011-10-10) +++++++++++++++++++ + +* Fixed a bug where the same connection would get returned into the pool twice, + causing extraneous "HttpConnectionPool is full" log warnings. + + +1.0 (2011-10-08) +++++++++++++++++ + +* Added ``PoolManager`` with LRU expiration of connections (tested and + documented). +* Added ``ProxyManager`` (needs tests, docs, and confirmation that it works + with HTTPS proxies). +* Added optional partial-read support for responses when + ``preload_content=False``. You can now make requests and just read the headers + without loading the content. +* Made response decoding optional (default on, same as before). +* Added optional explicit boundary string for ``encode_multipart_formdata``. +* Convenience request methods are now inherited from ``RequestMethods``. Old + helpers like ``get_url`` and ``post_url`` should be abandoned in favour of + the new ``request(method, url, ...)``. +* Refactored code to be even more decoupled, reusable, and extendable. +* License header added to ``.py`` files. +* Embiggened the documentation: Lots of Sphinx-friendly docstrings in the code + and docs in ``docs/`` and on urllib3.readthedocs.org. +* Embettered all the things! +* Started writing this file. + + +0.4.1 (2011-07-17) +++++++++++++++++++ + +* Minor bug fixes, code cleanup. + + +0.4 (2011-03-01) +++++++++++++++++ + +* Better unicode support. +* Added ``VerifiedHTTPSConnection``. +* Added ``NTLMConnectionPool`` in contrib. +* Minor improvements. + + +0.3.1 (2010-07-13) +++++++++++++++++++ + +* Added ``assert_host_name`` optional parameter. Now compatible with proxies. + + +0.3 (2009-12-10) +++++++++++++++++ + +* Added HTTPS support. +* Minor bug fixes. +* Refactored, broken backwards compatibility with 0.2. +* API to be treated as stable from this version forward. + + +0.2 (2008-11-17) +++++++++++++++++ + +* Added unit tests. +* Bug fixes. + + +0.1 (2008-11-16) +++++++++++++++++ + +* First release. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt new file mode 100644 index 0000000..37140ca --- /dev/null +++ b/CONTRIBUTORS.txt @@ -0,0 +1,43 @@ +# Contributions to the urllib3 project + +## Creator & Maintainer + +* Andrey Petrov + + +## Contributors + +In chronological order: + +* victor.vde + * HTTPS patch (which inspired HTTPSConnectionPool) + +* erikcederstrand + * NTLM-authenticated HTTPSConnectionPool + * Basic-authenticated HTTPSConnectionPool (merged into make_headers) + +* niphlod + * Client-verified SSL certificates for HTTPSConnectionPool + * Response gzip and deflate encoding support + * Better unicode support for filepost using StringIO buffers + +* btoconnor + * Non-multipart encoding for POST requests + +* p.dobrogost + * Code review, PEP8 compliance, benchmark fix + +* kennethreitz + * Bugfixes, suggestions, Requests integration + +* georgemarshall + * Bugfixes, Improvements and Test coverage + +* Thomas Kluyver + * Python 3 support + +* brandon-rhodes + * Design review, bugfixes, test coverage. + +* [Your name or handle] <[email or website]> + * [Brief summary of your changes] diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..f658ad6 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,19 @@ +This is the MIT license: http://www.opensource.org/licenses/mit-license.php + +Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..d1abae2 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt test-requirements.txt diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..2f0ab55 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,265 @@ +Metadata-Version: 1.0 +Name: urllib3 +Version: 1.2.2 +Summary: HTTP library with thread-safe connection pooling, file post, and more. +Home-page: http://urllib3.readthedocs.org/ +Author: Andrey Petrov +Author-email: andrey.petrov@shazow.net +License: MIT +Description: Highlights + ========== + + - Re-use the same socket connection for multiple requests + (``HTTPConnectionPool`` and ``HTTPSConnectionPool``) + (with optional client-side certificate verification). + - File posting (``encode_multipart_formdata``). + - Built-in redirection and retries (optional). + - Supports gzip and deflate decoding. + - Thread-safe and sanity-safe. + - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Small and easy to understand codebase perfect for extending and building upon. + For a more comprehensive solution, have a look at + `Requests `_ which is also powered by urllib3. + + What's wrong with urllib and urllib2? + ===================================== + + There are two critical features missing from the Python standard library: + Connection re-using/pooling and file posting. It's not terribly hard to + implement these yourself, but it's much easier to use a module that already + did the work for you. + + The Python standard libraries ``urllib`` and ``urllib2`` have little to do + with each other. They were designed to be independent and standalone, each + solving a different scope of problems, and ``urllib3`` follows in a similar + vein. + + Why do I want to reuse connections? + =================================== + + Performance. When you normally do a urllib call, a separate socket + connection is created with each request. By reusing existing sockets + (supported since HTTP 1.1), the requests will take up less resources on the + server's end, and also provide a faster response time at the client's end. + With some simple benchmarks (see `test/benchmark.py + `_ + ), downloading 15 URLs from google.com is about twice as fast when using + HTTPConnectionPool (which uses 1 connection) than using plain urllib (which + uses 15 connections). + + This library is perfect for: + + - Talking to an API + - Crawling a website + - Any situation where being able to post files, handle redirection, and + retrying is useful. It's relatively lightweight, so it can be used for + anything! + + Examples + ======== + + Go to `urllib3.readthedocs.org `_ + for more nice syntax-highlighted examples. + + But, long story short:: + + import urllib3 + + http = urllib3.PoolManager() + + r = http.request('GET', 'http://google.com/') + + print r.status, r.data + + The ``PoolManager`` will take care of reusing connections for you whenever + you request the same host. For more fine-grained control of your connection + pools, you should look at + `ConnectionPool `_. + + + Run the tests + ============= + + We use some external dependencies to run the urllib3 test suite. Easiest way to + run the tests is thusly from the urllib3 source root: :: + + $ pip install -r test-requirements.txt + $ nosetests + ..................................................... + + Success! You could also ``pip install coverage`` to get code coverage reporting. + + + Contributing + ============ + + #. `Check for open issues `_ or open + a fresh issue to start a discussion around a feature idea or a bug. There is + a *Contributor Friendly* tag for issues that should be ideal for people who + are not very familiar with the codebase yet. + #. Fork the `urllib3 repository on Github `_ + to start making your changes. + #. Write a test which shows that the bug was fixed or that the feature works + as expected. + #. Send a pull request and bug the maintainer until it gets merged and published. + :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + + + Changes + ======= + + + 1.2.2 (2012-02-06) + ++++++++++++++++++ + + * Fixed packaging bug of not shipping ``test-requirements.txt``. (Issue #47) + + + 1.2.1 (2012-02-05) + ++++++++++++++++++ + + * Fixed another bug related to when ``ssl`` module is not available. (Issue #41) + + * Location parsing errors now raise ``urllib3.exceptions.LocationParseError`` + which inherits from ``ValueError``. + + + 1.2 (2012-01-29) + ++++++++++++++++ + + * Added Python 3 support (tested on 3.2.2) + + * Dropped Python 2.5 support (tested on 2.6.7, 2.7.2) + + * Use ``select.poll`` instead of ``select.select`` for platforms that support + it. + + * Use ``Queue.LifoQueue`` instead of ``Queue.Queue`` for more aggressive + connection reusing. Configurable by overriding ``ConnectionPool.QueueCls``. + + * Fixed ``ImportError`` during install when ``ssl`` module is not available. + (Issue #41) + + * Fixed ``PoolManager`` redirects between schemes (such as HTTP -> HTTPS) not + completing properly. (Issue #28, uncovered by Issue #10 in v1.1) + + * Ported ``dummyserver`` to use ``tornado`` instead of ``webob`` + + ``eventlet``. Removed extraneous unsupported dummyserver testing backends. + Added socket-level tests. + + * More tests. Achievement Unlocked: 99% Coverage. + + + 1.1 (2012-01-07) + ++++++++++++++++ + + * Refactored ``dummyserver`` to its own root namespace module (used for + testing). + + * Added hostname verification for ``VerifiedHTTPSConnection`` by vendoring in + Py32's ``ssl_match_hostname``. (Issue #25) + + * Fixed cross-host HTTP redirects when using ``PoolManager``. (Issue #10) + + * Fixed ``decode_content`` being ignored when set through ``urlopen``. (Issue + #27) + + * Fixed timeout-related bugs. (Issues #17, #23) + + + 1.0.2 (2011-11-04) + ++++++++++++++++++ + + * Fixed typo in ``VerifiedHTTPSConnection`` which would only present as a bug if + you're using the object manually. (Thanks pyos) + + * Made RecentlyUsedContainer (and consequently PoolManager) more thread-safe by + wrapping the access log in a mutex. (Thanks @christer) + + * Made RecentlyUsedContainer more dict-like (corrected ``__delitem__`` and + ``__getitem__`` behaviour), with tests. Shouldn't affect core urllib3 code. + + + 1.0.1 (2011-10-10) + ++++++++++++++++++ + + * Fixed a bug where the same connection would get returned into the pool twice, + causing extraneous "HttpConnectionPool is full" log warnings. + + + 1.0 (2011-10-08) + ++++++++++++++++ + + * Added ``PoolManager`` with LRU expiration of connections (tested and + documented). + * Added ``ProxyManager`` (needs tests, docs, and confirmation that it works + with HTTPS proxies). + * Added optional partial-read support for responses when + ``preload_content=False``. You can now make requests and just read the headers + without loading the content. + * Made response decoding optional (default on, same as before). + * Added optional explicit boundary string for ``encode_multipart_formdata``. + * Convenience request methods are now inherited from ``RequestMethods``. Old + helpers like ``get_url`` and ``post_url`` should be abandoned in favour of + the new ``request(method, url, ...)``. + * Refactored code to be even more decoupled, reusable, and extendable. + * License header added to ``.py`` files. + * Embiggened the documentation: Lots of Sphinx-friendly docstrings in the code + and docs in ``docs/`` and on urllib3.readthedocs.org. + * Embettered all the things! + * Started writing this file. + + + 0.4.1 (2011-07-17) + ++++++++++++++++++ + + * Minor bug fixes, code cleanup. + + + 0.4 (2011-03-01) + ++++++++++++++++ + + * Better unicode support. + * Added ``VerifiedHTTPSConnection``. + * Added ``NTLMConnectionPool`` in contrib. + * Minor improvements. + + + 0.3.1 (2010-07-13) + ++++++++++++++++++ + + * Added ``assert_host_name`` optional parameter. Now compatible with proxies. + + + 0.3 (2009-12-10) + ++++++++++++++++ + + * Added HTTPS support. + * Minor bug fixes. + * Refactored, broken backwards compatibility with 0.2. + * API to be treated as stable from this version forward. + + + 0.2 (2008-11-17) + ++++++++++++++++ + + * Added unit tests. + * Bug fixes. + + + 0.1 (2008-11-16) + ++++++++++++++++ + + * First release. + +Keywords: urllib httplib threadsafe filepost http https ssl pooling +Platform: UNKNOWN +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Software Development :: Libraries diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..b376c81 --- /dev/null +++ b/README.rst @@ -0,0 +1,97 @@ +Highlights +========== + +- Re-use the same socket connection for multiple requests + (``HTTPConnectionPool`` and ``HTTPSConnectionPool``) + (with optional client-side certificate verification). +- File posting (``encode_multipart_formdata``). +- Built-in redirection and retries (optional). +- Supports gzip and deflate decoding. +- Thread-safe and sanity-safe. +- Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. +- Small and easy to understand codebase perfect for extending and building upon. + For a more comprehensive solution, have a look at + `Requests `_ which is also powered by urllib3. + +What's wrong with urllib and urllib2? +===================================== + +There are two critical features missing from the Python standard library: +Connection re-using/pooling and file posting. It's not terribly hard to +implement these yourself, but it's much easier to use a module that already +did the work for you. + +The Python standard libraries ``urllib`` and ``urllib2`` have little to do +with each other. They were designed to be independent and standalone, each +solving a different scope of problems, and ``urllib3`` follows in a similar +vein. + +Why do I want to reuse connections? +=================================== + +Performance. When you normally do a urllib call, a separate socket +connection is created with each request. By reusing existing sockets +(supported since HTTP 1.1), the requests will take up less resources on the +server's end, and also provide a faster response time at the client's end. +With some simple benchmarks (see `test/benchmark.py +`_ +), downloading 15 URLs from google.com is about twice as fast when using +HTTPConnectionPool (which uses 1 connection) than using plain urllib (which +uses 15 connections). + +This library is perfect for: + +- Talking to an API +- Crawling a website +- Any situation where being able to post files, handle redirection, and + retrying is useful. It's relatively lightweight, so it can be used for + anything! + +Examples +======== + +Go to `urllib3.readthedocs.org `_ +for more nice syntax-highlighted examples. + +But, long story short:: + + import urllib3 + + http = urllib3.PoolManager() + + r = http.request('GET', 'http://google.com/') + + print r.status, r.data + +The ``PoolManager`` will take care of reusing connections for you whenever +you request the same host. For more fine-grained control of your connection +pools, you should look at +`ConnectionPool `_. + + +Run the tests +============= + +We use some external dependencies to run the urllib3 test suite. Easiest way to +run the tests is thusly from the urllib3 source root: :: + + $ pip install -r test-requirements.txt + $ nosetests + ..................................................... + +Success! You could also ``pip install coverage`` to get code coverage reporting. + + +Contributing +============ + +#. `Check for open issues `_ or open + a fresh issue to start a discussion around a feature idea or a bug. There is + a *Contributor Friendly* tag for issues that should be ideal for people who + are not very familiar with the codebase yet. +#. Fork the `urllib3 repository on Github `_ + to start making your changes. +#. Write a test which shows that the bug was fixed or that the feature works + as expected. +#. Send a pull request and bug the maintainer until it gets merged and published. + :) Make sure to add yourself to ``CONTRIBUTORS.txt``. diff --git a/dummyserver/__init__.py b/dummyserver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py new file mode 100644 index 0000000..3e32881 --- /dev/null +++ b/dummyserver/handlers.py @@ -0,0 +1,159 @@ +from __future__ import print_function + +import gzip +import logging +import sys +import time +import zlib + +from io import BytesIO +from tornado.wsgi import HTTPRequest + +try: + from urllib.parse import urlsplit +except ImportError: + from urlparse import urlsplit + +log = logging.getLogger(__name__) + + +class Response(object): + def __init__(self, body='', status='200 OK', headers=None): + if not isinstance(body, bytes): + body = body.encode('utf8') + + self.body = body + self.status = status + self.headers = headers or [("Content-type", "text/plain")] + + def __call__(self, environ, start_response): + start_response(self.status, self.headers) + return [self.body] + + +class WSGIHandler(object): + pass + + +class TestingApp(WSGIHandler): + """ + Simple app that performs various operations, useful for testing an HTTP + library. + + Given any path, it will attempt to convert it will load a corresponding + local method if it exists. Status code 200 indicates success, 400 indicates + failure. Each method has its own conditions for success/failure. + """ + def __call__(self, environ, start_response): + req = HTTPRequest(environ) + + req.params = {} + for k, v in req.arguments.items(): + req.params[k] = next(iter(v)) + + path = req.path[:] + if not path.startswith('/'): + path = urlsplit(path).path + + target = path[1:].replace('/', '_') + method = getattr(self, target, self.index) + resp = method(req) + + if dict(resp.headers).get('Connection') == 'close': + # FIXME: Can we kill the connection somehow? + pass + + return resp(environ, start_response) + + def index(self, _request): + "Render simple message" + return Response("Dummy server!") + + def set_up(self, request): + test_type = request.params.get('test_type') + test_id = request.params.get('test_id') + if test_id: + print('\nNew test %s: %s' % (test_type, test_id)) + else: + print('\nNew test %s' % test_type) + return Response("Dummy server is ready!") + + def specific_method(self, request): + "Confirm that the request matches the desired method type" + method = request.params.get('method') + if method and not isinstance(method, str): + method = method.decode('utf8') + + if request.method != method: + return Response("Wrong method: %s != %s" % + (method, request.method), status='400') + return Response() + + def upload(self, request): + "Confirm that the uploaded file conforms to specification" + # FIXME: This is a huge broken mess + param = request.params.get('upload_param', 'myfile').decode('ascii') + filename = request.params.get('upload_filename', '').decode('utf-8') + size = int(request.params.get('upload_size', '0')) + files_ = request.files.get(param) + + if len(files_) != 1: + return Response("Expected 1 file for '%s', not %d" %(param, len(files_)), + status='400') + file_ = files_[0] + + data = file_['body'] + if int(size) != len(data): + return Response("Wrong size: %d != %d" % + (size, len(data)), status='400') + + if filename != file_['filename']: + return Response("Wrong filename: %s != %s" % + (filename, file_.filename), status='400') + + return Response() + + def redirect(self, request): + "Perform a redirect to ``target``" + target = request.params.get('target', '/') + headers = [('Location', target)] + return Response(status='303', headers=headers) + + def keepalive(self, request): + if request.params.get('close', '0') == '1': + headers = [('Connection', 'close')] + return Response('Closing', headers=headers) + + headers = [('Connection', 'keep-alive')] + return Response('Keeping alive', headers=headers) + + def sleep(self, request): + "Sleep for a specified amount of ``seconds``" + seconds = float(request.params.get('seconds', '1')) + time.sleep(seconds) + return Response() + + def echo(self, request): + "Echo back the params" + if request.method == 'GET': + return Response(request.query) + + return Response(request.body) + + def encodingrequest(self, request): + "Check for UA accepting gzip/deflate encoding" + data = b"hello, world!" + encoding = request.headers.get('Accept-Encoding', '') + headers = None + if 'gzip' in encoding: + headers = [('Content-Encoding', 'gzip')] + file_ = BytesIO() + gzip.GzipFile('', mode='w', fileobj=file_).write(data) + data = file_.getvalue() + elif 'deflate' in encoding: + headers = [('Content-Encoding', 'deflate')] + data = zlib.compress(data) + return Response(data, headers=headers) + + def shutdown(self, request): + sys.exit() diff --git a/dummyserver/server.py b/dummyserver/server.py new file mode 100755 index 0000000..529850f --- /dev/null +++ b/dummyserver/server.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python + +""" +Dummy server used for unit testing. +""" +from __future__ import print_function + +import logging +import os +import sys +import threading +import socket + +import tornado.wsgi +import tornado.httpserver +import tornado.ioloop + +from dummyserver.handlers import TestingApp + + +log = logging.getLogger(__name__) + +CERTS_PATH = os.path.join(os.path.dirname(__file__), 'certs') +DEFAULT_CERTS = { + 'certfile': os.path.join(CERTS_PATH, 'server.crt'), + 'keyfile': os.path.join(CERTS_PATH, 'server.key'), +} +DEFAULT_CA = os.path.join(CERTS_PATH, 'cacert.pem') +DEFAULT_CA_BAD = os.path.join(CERTS_PATH, 'client_bad.pem') + + +# Different types of servers we have: + + +class SocketServerThread(threading.Thread): + """ + :param socket_handler: Callable which receives a socket argument for one + request. + :param ready_lock: Lock which gets released when the socket handler is + ready to receive requests. + """ + def __init__(self, socket_handler, host='localhost', port=8081, + ready_lock=None): + threading.Thread.__init__(self) + + self.socket_handler = socket_handler + self.host = host + self.port = port + self.ready_lock = ready_lock + + def _start_server(self): + sock = socket.socket() + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind((self.host, self.port)) + + # Once listen() returns, the server socket is ready + sock.listen(1) + + if self.ready_lock: + self.ready_lock.release() + + self.socket_handler(sock) + + def run(self): + self.server = self._start_server() + + +class TornadoServerThread(threading.Thread): + def __init__(self, host='localhost', port=8081, scheme='http', certs=None): + threading.Thread.__init__(self) + + self.host = host + self.port = port + self.scheme = scheme + self.certs = certs + + def _start_server(self): + container = tornado.wsgi.WSGIContainer(TestingApp()) + + if self.scheme == 'https': + http_server = tornado.httpserver.HTTPServer(container, + ssl_options=self.certs) + else: + http_server = tornado.httpserver.HTTPServer(container) + + http_server.listen(self.port) + return http_server + + def run(self): + self.server = self._start_server() + self.ioloop = tornado.ioloop.IOLoop.instance() + self.ioloop.start() + + def stop(self): + self.server.stop() + self.ioloop.stop() + + +if __name__ == '__main__': + log.setLevel(logging.DEBUG) + log.addHandler(logging.StreamHandler(sys.stderr)) + + from urllib3 import get_host + + url = "http://localhost:8081" + if len(sys.argv) > 1: + url = sys.argv[1] + + print("Starting WGI server at: %s" % url) + + scheme, host, port = get_host(url) + t = TornadoServerThread(scheme=scheme, host=host, port=port) + t.start() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py new file mode 100644 index 0000000..518d739 --- /dev/null +++ b/dummyserver/testcase.py @@ -0,0 +1,71 @@ +import unittest + +from threading import Lock + +from dummyserver.server import ( + TornadoServerThread, SocketServerThread, + DEFAULT_CERTS, +) + + +# TODO: Change ports to auto-allocated? + + +class SocketDummyServerTestCase(unittest.TestCase): + """ + A simple socket-based server is created for this class that is good for + exactly one request. + """ + scheme = 'http' + host = 'localhost' + port = 18080 + + @classmethod + def _start_server(cls, socket_handler): + ready_lock = Lock() + ready_lock.acquire() + cls.server_thread = SocketServerThread(socket_handler=socket_handler, + ready_lock=ready_lock, + host=cls.host, port=cls.port) + cls.server_thread.start() + + # Lock gets released by thread above + ready_lock.acquire() + + +class HTTPDummyServerTestCase(unittest.TestCase): + scheme = 'http' + host = 'localhost' + host_alt = '127.0.0.1' # Some tests need two hosts + port = 18081 + certs = DEFAULT_CERTS + + @classmethod + def _start_server(cls): + cls.server_thread = TornadoServerThread(host=cls.host, port=cls.port, + scheme=cls.scheme, + certs=cls.certs) + cls.server_thread.start() + + # TODO: Loop-check here instead + import time + time.sleep(0.1) + + @classmethod + def _stop_server(cls): + cls.server_thread.stop() + + @classmethod + def setUpClass(cls): + cls._start_server() + + @classmethod + def tearDownClass(cls): + cls._stop_server() + + +class HTTPSDummyServerTestCase(HTTPDummyServerTestCase): + scheme = 'https' + host = 'localhost' + port = 18082 + certs = DEFAULT_CERTS diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..58ce3f5 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +[nosetests] +logging-clear-handlers = true +with-coverage = true +cover-package = urllib3 + +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f093f34 --- /dev/null +++ b/setup.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +from distutils.core import setup + +import os +import re + +try: + import setuptools +except ImportError: + pass # No 'develop' command, oh well. + +base_path = os.path.dirname(__file__) + +# Get the version (borrowed from SQLAlchemy) +fp = open(os.path.join(base_path, 'urllib3', '__init__.py')) +VERSION = re.compile(r".*__version__ = '(.*?)'", + re.S).match(fp.read()).group(1) +fp.close() + + +version = VERSION + +requirements = [] +tests_requirements = requirements + open('test-requirements.txt').readlines() + +setup(name='urllib3', + version=version, + description="HTTP library with thread-safe connection pooling, file post, and more.", + long_description=open('README.rst').read() + '\n\n' + open('CHANGES.rst').read(), + classifiers=[ + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 3', + 'Topic :: Internet :: WWW/HTTP', + 'Topic :: Software Development :: Libraries', + ], + keywords='urllib httplib threadsafe filepost http https ssl pooling', + author='Andrey Petrov', + author_email='andrey.petrov@shazow.net', + url='http://urllib3.readthedocs.org/', + license='MIT', + packages=['urllib3', 'dummyserver', 'urllib3.packages', + 'urllib3.packages.ssl_match_hostname', 'urllib3.packages.mimetools_choose_boundary', + ], + requires=requirements, + tests_require=tests_requirements, + test_suite='test', + ) diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 0000000..568b0d4 --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,2 @@ +nose +tornado diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/benchmark.py b/test/benchmark.py new file mode 100644 index 0000000..e7049c4 --- /dev/null +++ b/test/benchmark.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +""" +Really simple rudimentary benchmark to compare ConnectionPool versus standard +urllib to demonstrate the usefulness of connection re-using. +""" +from __future__ import print_function + +import sys +import time +import urllib + +sys.path.append('../') +import urllib3 + + +# URLs to download. Doesn't matter as long as they're from the same host, so we +# can take advantage of connection re-using. +TO_DOWNLOAD = [ + 'http://code.google.com/apis/apps/', + 'http://code.google.com/apis/base/', + 'http://code.google.com/apis/blogger/', + 'http://code.google.com/apis/calendar/', + 'http://code.google.com/apis/codesearch/', + 'http://code.google.com/apis/contact/', + 'http://code.google.com/apis/books/', + 'http://code.google.com/apis/documents/', + 'http://code.google.com/apis/finance/', + 'http://code.google.com/apis/health/', + 'http://code.google.com/apis/notebook/', + 'http://code.google.com/apis/picasaweb/', + 'http://code.google.com/apis/spreadsheets/', + 'http://code.google.com/apis/webmastertools/', + 'http://code.google.com/apis/youtube/', +] + + +def urllib_get(url_list): + assert url_list + for url in url_list: + now = time.time() + r = urllib.urlopen(url) + elapsed = time.time() - now + print("Got in %0.3f: %s" % (elapsed, url)) + + +def pool_get(url_list): + assert url_list + pool = urllib3.connection_from_url(url_list[0]) + for url in url_list: + now = time.time() + r = pool.get_url(url) + elapsed = time.time() - now + print("Got in %0.3fs: %s" % (elapsed, url)) + + +if __name__ == '__main__': + print("Running pool_get ...") + now = time.time() + pool_get(TO_DOWNLOAD) + pool_elapsed = time.time() - now + + print("Running urllib_get ...") + now = time.time() + urllib_get(TO_DOWNLOAD) + urllib_elapsed = time.time() - now + + print("Completed pool_get in %0.3fs" % pool_elapsed) + print("Completed urllib_get in %0.3fs" % urllib_elapsed) + + +""" +Example results: + +Completed pool_get in 1.163s +Completed urllib_get in 2.318s +""" diff --git a/test/test_collections.py b/test/test_collections.py new file mode 100644 index 0000000..f8275e0 --- /dev/null +++ b/test/test_collections.py @@ -0,0 +1,111 @@ +import unittest + +from urllib3._collections import RecentlyUsedContainer as Container +from urllib3.packages import six +xrange = six.moves.xrange + +class TestLRUContainer(unittest.TestCase): + def test_maxsize(self): + d = Container(5) + + for i in xrange(5): + d[i] = str(i) + + self.assertEqual(len(d), 5) + + for i in xrange(5): + self.assertEqual(d[i], str(i)) + + d[i+1] = str(i+1) + + self.assertEqual(len(d), 5) + self.assertFalse(0 in d) + self.assertTrue(i+1 in d) + + def test_expire(self): + d = Container(5) + + for i in xrange(5): + d[i] = str(i) + + for i in xrange(5): + d.get(0) + + # Add one more entry + d[5] = '5' + + # Check state + self.assertEqual(list(d.keys()), [0, 2, 3, 4, 5]) + + def test_pruning(self): + d = Container(5) + + for i in xrange(5): + d[i] = str(i) + + # Contend 2 entries for the most-used slot to balloon the heap + for i in xrange(100): + d.get(i % 2) + + self.assertTrue(len(d.access_log) <= d.CLEANUP_FACTOR * d._maxsize) + + def test_same_key(self): + d = Container(5) + + for i in xrange(10): + d['foo'] = i + + self.assertEqual(list(d.keys()), ['foo']) + + d._prune_invalidated_entries() + + self.assertEqual(len(d.access_log), 1) + + def test_access_ordering(self): + d = Container(5) + + for i in xrange(10): + d[i] = True + + self.assertEqual(d._get_ordered_access_keys(), [9,8,7,6,5]) + + new_order = [7,8,6,9,5] + for k in reversed(new_order): + d[k] + + self.assertEqual(d._get_ordered_access_keys(), new_order) + + def test_delete(self): + d = Container(5) + + for i in xrange(5): + d[i] = True + + del d[0] + self.assertFalse(0 in d) + + d.pop(1) + self.assertFalse(1 in d) + + d.pop(1, None) + + def test_get(self): + d = Container(5) + + for i in xrange(5): + d[i] = True + + r = d.get(4) + self.assertEqual(r, True) + + r = d.get(5) + self.assertEqual(r, None) + + r = d.get(5, 42) + self.assertEqual(r, 42) + + self.assertRaises(KeyError, lambda: d[5]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py new file mode 100644 index 0000000..4281d42 --- /dev/null +++ b/test/test_connectionpool.py @@ -0,0 +1,136 @@ +import unittest + +from urllib3.connectionpool import ( + connection_from_url, + get_host, + HTTPConnectionPool, + make_headers) + +from urllib3.exceptions import EmptyPoolError, LocationParseError + + +class TestConnectionPool(unittest.TestCase): + def test_get_host(self): + url_host_map = { + 'http://google.com/mail': ('http', 'google.com', None), + 'http://google.com/mail/': ('http', 'google.com', None), + 'google.com/mail': ('http', 'google.com', None), + 'http://google.com/': ('http', 'google.com', None), + 'http://google.com': ('http', 'google.com', None), + 'http://www.google.com': ('http', 'www.google.com', None), + 'http://mail.google.com': ('http', 'mail.google.com', None), + 'http://google.com:8000/mail/': ('http', 'google.com', 8000), + 'http://google.com:8000': ('http', 'google.com', 8000), + 'https://google.com': ('https', 'google.com', None), + 'https://google.com:8000': ('https', 'google.com', 8000), + 'http://user:password@127.0.0.1:1234': ('http', '127.0.0.1', 1234), + } + for url, expected_host in url_host_map.items(): + returned_host = get_host(url) + self.assertEquals(returned_host, expected_host) + + def test_same_host(self): + same_host = [ + ('http://google.com/', '/'), + ('http://google.com/', 'http://google.com/'), + ('http://google.com/', 'http://google.com'), + ('http://google.com/', 'http://google.com/abra/cadabra'), + ('http://google.com:42/', 'http://google.com:42/abracadabra'), + ] + + for a, b in same_host: + c = connection_from_url(a) + self.assertTrue(c.is_same_host(b), "%s =? %s" % (a, b)) + + not_same_host = [ + ('https://google.com/', 'http://google.com/'), + ('http://google.com/', 'https://google.com/'), + ('http://yahoo.com/', 'http://google.com/'), + ('http://google.com:42', 'https://google.com/abracadabra'), + ('http://google.com', 'https://google.net/'), + ] + + for a, b in not_same_host: + c = connection_from_url(a) + self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) + + def test_invalid_host(self): + # TODO: Add more tests + invalid_host = [ + 'http://google.com:foo', + ] + + for location in invalid_host: + self.assertRaises(LocationParseError, get_host, location) + + + def test_make_headers(self): + self.assertEqual( + make_headers(accept_encoding=True), + {'accept-encoding': 'gzip,deflate'}) + + self.assertEqual( + make_headers(accept_encoding='foo,bar'), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=['foo', 'bar']), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=True, user_agent='banana'), + {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}) + + self.assertEqual( + make_headers(user_agent='banana'), + {'user-agent': 'banana'}) + + self.assertEqual( + make_headers(keep_alive=True), + {'connection': 'keep-alive'}) + + self.assertEqual( + make_headers(basic_auth='foo:bar'), + {'authorization': 'Basic Zm9vOmJhcg=='}) + + def test_max_connections(self): + pool = HTTPConnectionPool(host='localhost', maxsize=1, block=True) + + pool._get_conn(timeout=0.01) + + try: + pool._get_conn(timeout=0.01) + self.fail("Managed to get a connection without EmptyPoolError") + except EmptyPoolError: + pass + + try: + pool.get_url('/', pool_timeout=0.01) + self.fail("Managed to get a connection without EmptyPoolError") + except EmptyPoolError: + pass + + self.assertEqual(pool.num_connections, 1) + + def test_pool_edgecases(self): + pool = HTTPConnectionPool(host='localhost', maxsize=1, block=False) + + conn1 = pool._get_conn() + conn2 = pool._get_conn() # New because block=False + + pool._put_conn(conn1) + pool._put_conn(conn2) # Should be discarded + + self.assertEqual(conn1, pool._get_conn()) + self.assertNotEqual(conn2, pool._get_conn()) + + self.assertEqual(pool.num_connections, 3) + + def test_exception_str(self): + self.assertEqual( + str(EmptyPoolError(HTTPConnectionPool(host='localhost'), "Test.")), + "HTTPConnectionPool(host='localhost', port=None): Test.") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py new file mode 100644 index 0000000..12722f7 --- /dev/null +++ b/test/test_poolmanager.py @@ -0,0 +1,47 @@ +import unittest + +from urllib3.poolmanager import PoolManager +from urllib3 import connection_from_url + + +class TestPoolManager(unittest.TestCase): + def test_same_url(self): + # Convince ourselves that normally we don't get the same object + conn1 = connection_from_url('http://localhost:8081/foo') + conn2 = connection_from_url('http://localhost:8081/bar') + + self.assertNotEqual(conn1, conn2) + + # Now try again using the PoolManager + p = PoolManager(1) + + conn1 = p.connection_from_url('http://localhost:8081/foo') + conn2 = p.connection_from_url('http://localhost:8081/bar') + + self.assertEqual(conn1, conn2) + + def test_many_urls(self): + urls = [ + "http://localhost:8081/foo", + "http://www.google.com/mail", + "http://localhost:8081/bar", + "https://www.google.com/", + "https://www.google.com/mail", + "http://yahoo.com", + "http://bing.com", + "http://yahoo.com/", + ] + + connections = set() + + p = PoolManager(10) + + for url in urls: + conn = p.connection_from_url(url) + connections.add(conn) + + self.assertEqual(len(connections), 5) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_response.py b/test/test_response.py new file mode 100644 index 0000000..0ef379c --- /dev/null +++ b/test/test_response.py @@ -0,0 +1,68 @@ +import unittest +import zlib + +from io import BytesIO + +from urllib3.response import HTTPResponse + +class TestLegacyResponse(unittest.TestCase): + def test_getheaders(self): + headers = {'host': 'example.com'} + r = HTTPResponse(headers=headers) + self.assertEqual(r.getheaders(), headers) + + def test_getheader(self): + headers = {'host': 'example.com'} + r = HTTPResponse(headers=headers) + self.assertEqual(r.getheader('host'), 'example.com') + + +class TestResponse(unittest.TestCase): + def test_cache_content(self): + r = HTTPResponse('foo') + self.assertEqual(r.data, 'foo') + self.assertEqual(r._body, 'foo') + + def test_default(self): + r = HTTPResponse() + self.assertEqual(r.data, None) + + def test_none(self): + r = HTTPResponse(None) + self.assertEqual(r.data, None) + + def test_preload(self): + fp = BytesIO(b'foo') + + r = HTTPResponse(fp, preload_content=True) + + self.assertEqual(fp.tell(), len(b'foo')) + self.assertEqual(r.data, b'foo') + + def test_no_preload(self): + fp = BytesIO(b'foo') + + r = HTTPResponse(fp, preload_content=False) + + self.assertEqual(fp.tell(), 0) + self.assertEqual(r.data, b'foo') + self.assertEqual(fp.tell(), len(b'foo')) + + def test_decode_bad_data(self): + fp = BytesIO(b'\x00' * 10) + self.assertRaises(zlib.error, HTTPResponse, fp, headers={ + 'content-encoding': 'deflate' + }) + + def test_decode_deflate(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate'}) + + self.assertEqual(r.data, b'foo') + + +if __name__ == '__main__': + unittest.main() diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO new file mode 100644 index 0000000..2f0ab55 --- /dev/null +++ b/urllib3.egg-info/PKG-INFO @@ -0,0 +1,265 @@ +Metadata-Version: 1.0 +Name: urllib3 +Version: 1.2.2 +Summary: HTTP library with thread-safe connection pooling, file post, and more. +Home-page: http://urllib3.readthedocs.org/ +Author: Andrey Petrov +Author-email: andrey.petrov@shazow.net +License: MIT +Description: Highlights + ========== + + - Re-use the same socket connection for multiple requests + (``HTTPConnectionPool`` and ``HTTPSConnectionPool``) + (with optional client-side certificate verification). + - File posting (``encode_multipart_formdata``). + - Built-in redirection and retries (optional). + - Supports gzip and deflate decoding. + - Thread-safe and sanity-safe. + - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Small and easy to understand codebase perfect for extending and building upon. + For a more comprehensive solution, have a look at + `Requests `_ which is also powered by urllib3. + + What's wrong with urllib and urllib2? + ===================================== + + There are two critical features missing from the Python standard library: + Connection re-using/pooling and file posting. It's not terribly hard to + implement these yourself, but it's much easier to use a module that already + did the work for you. + + The Python standard libraries ``urllib`` and ``urllib2`` have little to do + with each other. They were designed to be independent and standalone, each + solving a different scope of problems, and ``urllib3`` follows in a similar + vein. + + Why do I want to reuse connections? + =================================== + + Performance. When you normally do a urllib call, a separate socket + connection is created with each request. By reusing existing sockets + (supported since HTTP 1.1), the requests will take up less resources on the + server's end, and also provide a faster response time at the client's end. + With some simple benchmarks (see `test/benchmark.py + `_ + ), downloading 15 URLs from google.com is about twice as fast when using + HTTPConnectionPool (which uses 1 connection) than using plain urllib (which + uses 15 connections). + + This library is perfect for: + + - Talking to an API + - Crawling a website + - Any situation where being able to post files, handle redirection, and + retrying is useful. It's relatively lightweight, so it can be used for + anything! + + Examples + ======== + + Go to `urllib3.readthedocs.org `_ + for more nice syntax-highlighted examples. + + But, long story short:: + + import urllib3 + + http = urllib3.PoolManager() + + r = http.request('GET', 'http://google.com/') + + print r.status, r.data + + The ``PoolManager`` will take care of reusing connections for you whenever + you request the same host. For more fine-grained control of your connection + pools, you should look at + `ConnectionPool `_. + + + Run the tests + ============= + + We use some external dependencies to run the urllib3 test suite. Easiest way to + run the tests is thusly from the urllib3 source root: :: + + $ pip install -r test-requirements.txt + $ nosetests + ..................................................... + + Success! You could also ``pip install coverage`` to get code coverage reporting. + + + Contributing + ============ + + #. `Check for open issues `_ or open + a fresh issue to start a discussion around a feature idea or a bug. There is + a *Contributor Friendly* tag for issues that should be ideal for people who + are not very familiar with the codebase yet. + #. Fork the `urllib3 repository on Github `_ + to start making your changes. + #. Write a test which shows that the bug was fixed or that the feature works + as expected. + #. Send a pull request and bug the maintainer until it gets merged and published. + :) Make sure to add yourself to ``CONTRIBUTORS.txt``. + + + Changes + ======= + + + 1.2.2 (2012-02-06) + ++++++++++++++++++ + + * Fixed packaging bug of not shipping ``test-requirements.txt``. (Issue #47) + + + 1.2.1 (2012-02-05) + ++++++++++++++++++ + + * Fixed another bug related to when ``ssl`` module is not available. (Issue #41) + + * Location parsing errors now raise ``urllib3.exceptions.LocationParseError`` + which inherits from ``ValueError``. + + + 1.2 (2012-01-29) + ++++++++++++++++ + + * Added Python 3 support (tested on 3.2.2) + + * Dropped Python 2.5 support (tested on 2.6.7, 2.7.2) + + * Use ``select.poll`` instead of ``select.select`` for platforms that support + it. + + * Use ``Queue.LifoQueue`` instead of ``Queue.Queue`` for more aggressive + connection reusing. Configurable by overriding ``ConnectionPool.QueueCls``. + + * Fixed ``ImportError`` during install when ``ssl`` module is not available. + (Issue #41) + + * Fixed ``PoolManager`` redirects between schemes (such as HTTP -> HTTPS) not + completing properly. (Issue #28, uncovered by Issue #10 in v1.1) + + * Ported ``dummyserver`` to use ``tornado`` instead of ``webob`` + + ``eventlet``. Removed extraneous unsupported dummyserver testing backends. + Added socket-level tests. + + * More tests. Achievement Unlocked: 99% Coverage. + + + 1.1 (2012-01-07) + ++++++++++++++++ + + * Refactored ``dummyserver`` to its own root namespace module (used for + testing). + + * Added hostname verification for ``VerifiedHTTPSConnection`` by vendoring in + Py32's ``ssl_match_hostname``. (Issue #25) + + * Fixed cross-host HTTP redirects when using ``PoolManager``. (Issue #10) + + * Fixed ``decode_content`` being ignored when set through ``urlopen``. (Issue + #27) + + * Fixed timeout-related bugs. (Issues #17, #23) + + + 1.0.2 (2011-11-04) + ++++++++++++++++++ + + * Fixed typo in ``VerifiedHTTPSConnection`` which would only present as a bug if + you're using the object manually. (Thanks pyos) + + * Made RecentlyUsedContainer (and consequently PoolManager) more thread-safe by + wrapping the access log in a mutex. (Thanks @christer) + + * Made RecentlyUsedContainer more dict-like (corrected ``__delitem__`` and + ``__getitem__`` behaviour), with tests. Shouldn't affect core urllib3 code. + + + 1.0.1 (2011-10-10) + ++++++++++++++++++ + + * Fixed a bug where the same connection would get returned into the pool twice, + causing extraneous "HttpConnectionPool is full" log warnings. + + + 1.0 (2011-10-08) + ++++++++++++++++ + + * Added ``PoolManager`` with LRU expiration of connections (tested and + documented). + * Added ``ProxyManager`` (needs tests, docs, and confirmation that it works + with HTTPS proxies). + * Added optional partial-read support for responses when + ``preload_content=False``. You can now make requests and just read the headers + without loading the content. + * Made response decoding optional (default on, same as before). + * Added optional explicit boundary string for ``encode_multipart_formdata``. + * Convenience request methods are now inherited from ``RequestMethods``. Old + helpers like ``get_url`` and ``post_url`` should be abandoned in favour of + the new ``request(method, url, ...)``. + * Refactored code to be even more decoupled, reusable, and extendable. + * License header added to ``.py`` files. + * Embiggened the documentation: Lots of Sphinx-friendly docstrings in the code + and docs in ``docs/`` and on urllib3.readthedocs.org. + * Embettered all the things! + * Started writing this file. + + + 0.4.1 (2011-07-17) + ++++++++++++++++++ + + * Minor bug fixes, code cleanup. + + + 0.4 (2011-03-01) + ++++++++++++++++ + + * Better unicode support. + * Added ``VerifiedHTTPSConnection``. + * Added ``NTLMConnectionPool`` in contrib. + * Minor improvements. + + + 0.3.1 (2010-07-13) + ++++++++++++++++++ + + * Added ``assert_host_name`` optional parameter. Now compatible with proxies. + + + 0.3 (2009-12-10) + ++++++++++++++++ + + * Added HTTPS support. + * Minor bug fixes. + * Refactored, broken backwards compatibility with 0.2. + * API to be treated as stable from this version forward. + + + 0.2 (2008-11-17) + ++++++++++++++++ + + * Added unit tests. + * Bug fixes. + + + 0.1 (2008-11-16) + ++++++++++++++++ + + * First release. + +Keywords: urllib httplib threadsafe filepost http https ssl pooling +Platform: UNKNOWN +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Software Development :: Libraries diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt new file mode 100644 index 0000000..d7cbb3d --- /dev/null +++ b/urllib3.egg-info/SOURCES.txt @@ -0,0 +1,36 @@ +CHANGES.rst +CONTRIBUTORS.txt +LICENSE.txt +MANIFEST.in +README.rst +setup.cfg +setup.py +test-requirements.txt +dummyserver/__init__.py +dummyserver/handlers.py +dummyserver/server.py +dummyserver/testcase.py +test/__init__.py +test/benchmark.py +test/test_collections.py +test/test_connectionpool.py +test/test_poolmanager.py +test/test_response.py +urllib3/__init__.py +urllib3/_collections.py +urllib3/connectionpool.py +urllib3/exceptions.py +urllib3/filepost.py +urllib3/poolmanager.py +urllib3/request.py +urllib3/response.py +urllib3.egg-info/PKG-INFO +urllib3.egg-info/SOURCES.txt +urllib3.egg-info/dependency_links.txt +urllib3.egg-info/top_level.txt +urllib3/contrib/__init__.py +urllib3/contrib/ntlmpool.py +urllib3/packages/__init__.py +urllib3/packages/six.py +urllib3/packages/mimetools_choose_boundary/__init__.py +urllib3/packages/ssl_match_hostname/__init__.py \ No newline at end of file diff --git a/urllib3.egg-info/dependency_links.txt b/urllib3.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/urllib3.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/urllib3.egg-info/top_level.txt b/urllib3.egg-info/top_level.txt new file mode 100644 index 0000000..93675d9 --- /dev/null +++ b/urllib3.egg-info/top_level.txt @@ -0,0 +1,2 @@ +urllib3 +dummyserver diff --git a/urllib3/__init__.py b/urllib3/__init__.py new file mode 100644 index 0000000..2e9c663 --- /dev/null +++ b/urllib3/__init__.py @@ -0,0 +1,48 @@ +# urllib3/__init__.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +urllib3 - Thread-safe connection pooling and re-using. +""" + +__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' +__license__ = 'MIT' +__version__ = '1.2.2' + + +from .connectionpool import ( + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url, + get_host, + make_headers) + + +from .exceptions import ( + HTTPError, + MaxRetryError, + SSLError, + TimeoutError) + +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .filepost import encode_multipart_formdata + + +# Set default logging handler to avoid "No handler found" warnings. +import logging +try: + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +logging.getLogger(__name__).addHandler(NullHandler()) + +# ... Clean up. +del logging +del NullHandler diff --git a/urllib3/_collections.py b/urllib3/_collections.py new file mode 100644 index 0000000..3cef081 --- /dev/null +++ b/urllib3/_collections.py @@ -0,0 +1,131 @@ +# urllib3/_collections.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from collections import deque + +from threading import RLock + +__all__ = ['RecentlyUsedContainer'] + + +class AccessEntry(object): + __slots__ = ('key', 'is_valid') + + def __init__(self, key, is_valid=True): + self.key = key + self.is_valid = is_valid + + +class RecentlyUsedContainer(dict): + """ + Provides a dict-like that maintains up to ``maxsize`` keys while throwing + away the least-recently-used keys beyond ``maxsize``. + """ + + # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we + # will attempt to cleanup the invalidated entries in the access_log + # datastructure during the next 'get' operation. + CLEANUP_FACTOR = 10 + + def __init__(self, maxsize=10): + self._maxsize = maxsize + + self._container = {} + + # We use a deque to to store our keys ordered by the last access. + self.access_log = deque() + self.access_log_lock = RLock() + + # We look up the access log entry by the key to invalidate it so we can + # insert a new authorative entry at the head without having to dig and + # find the old entry for removal immediately. + self.access_lookup = {} + + # Trigger a heap cleanup when we get past this size + self.access_log_limit = maxsize * self.CLEANUP_FACTOR + + def _invalidate_entry(self, key): + "If exists: Invalidate old entry and return it." + old_entry = self.access_lookup.get(key) + if old_entry: + old_entry.is_valid = False + + return old_entry + + def _push_entry(self, key): + "Push entry onto our access log, invalidate the old entry if exists." + self._invalidate_entry(key) + + new_entry = AccessEntry(key) + self.access_lookup[key] = new_entry + + self.access_log_lock.acquire() + self.access_log.appendleft(new_entry) + self.access_log_lock.release() + + def _prune_entries(self, num): + "Pop entries from our access log until we popped ``num`` valid ones." + while num > 0: + self.access_log_lock.acquire() + p = self.access_log.pop() + self.access_log_lock.release() + + if not p.is_valid: + continue # Invalidated entry, skip + + dict.pop(self, p.key, None) + self.access_lookup.pop(p.key, None) + num -= 1 + + def _prune_invalidated_entries(self): + "Rebuild our access_log without the invalidated entries." + self.access_log_lock.acquire() + self.access_log = deque(e for e in self.access_log if e.is_valid) + self.access_log_lock.release() + + def _get_ordered_access_keys(self): + "Return ordered access keys for inspection. Used for testing." + self.access_log_lock.acquire() + r = [e.key for e in self.access_log if e.is_valid] + self.access_log_lock.release() + + return r + + def __getitem__(self, key): + item = dict.get(self, key) + + if not item: + raise KeyError(key) + + # Insert new entry with new high priority, also implicitly invalidates + # the old entry. + self._push_entry(key) + + if len(self.access_log) > self.access_log_limit: + # Heap is getting too big, try to clean up any tailing invalidated + # entries. + self._prune_invalidated_entries() + + return item + + def __setitem__(self, key, item): + # Add item to our container and access log + dict.__setitem__(self, key, item) + self._push_entry(key) + + # Discard invalid and excess entries + self._prune_entries(len(self) - self._maxsize) + + def __delitem__(self, key): + self._invalidate_entry(key) + self.access_lookup.pop(key, None) + dict.__delitem__(self, key) + + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py new file mode 100644 index 0000000..39e652e --- /dev/null +++ b/urllib3/connectionpool.py @@ -0,0 +1,629 @@ +# urllib3/connectionpool.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import logging +import socket + +from base64 import b64encode +from socket import error as SocketError, timeout as SocketTimeout + +try: + from select import poll, POLLIN +except ImportError: # Doesn't exist on OSX and other platforms + from select import select + poll = False + +try: # Python 3 + from http.client import HTTPConnection, HTTPException + from http.client import HTTP_PORT, HTTPS_PORT +except ImportError: + from httplib import HTTPConnection, HTTPException + from httplib import HTTP_PORT, HTTPS_PORT + +try: # Python 3 + from queue import LifoQueue, Empty, Full +except ImportError: + from Queue import LifoQueue, Empty, Full + + +try: # Compiled with SSL? + HTTPSConnection = object + BaseSSLError = None + ssl = None + + try: # Python 3 + from http.client import HTTPSConnection + except ImportError: + from httplib import HTTPSConnection + + import ssl + BaseSSLError = ssl.SSLError + +except ImportError: + pass + + +from .packages.ssl_match_hostname import match_hostname, CertificateError +from .request import RequestMethods +from .response import HTTPResponse +from .exceptions import ( + EmptyPoolError, + HostChangedError, + LocationParseError, + MaxRetryError, + SSLError, + TimeoutError, +) + +from .packages.ssl_match_hostname import match_hostname, CertificateError +from .packages import six + +xrange = six.moves.xrange + +log = logging.getLogger(__name__) + +_Default = object() + +port_by_scheme = { + 'http': HTTP_PORT, + 'https': HTTPS_PORT, +} + +## Connection objects (extension of httplib) + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + ssl_req_scheme = { + 'CERT_NONE': ssl.CERT_NONE, + 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, + 'CERT_REQUIRED': ssl.CERT_REQUIRED + } + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.ca_certs = ca_certs + + def connect(self): + # Add certificate verification + sock = socket.create_connection((self.host, self.port), self.timeout) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs) + if self.ca_certs: + match_hostname(self.sock.getpeercert(), self.host) + +## Pool objects + +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + + scheme = None + QueueCls = LifoQueue + + def __init__(self, host, port=None): + self.host = host + self.port = port + + def __str__(self): + return '%s(host=%r, port=%r)' % (type(self).__name__, + self.host, self.port) + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + :param timeout: + Socket timeout for each individual connection, can be a float. None + disables timeout. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to false, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + scheme = 'http' + + def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, + block=False, headers=None): + super(HTTPConnectionPool, self).__init__(host, port) + + self.strict = strict + self.timeout = timeout + self.pool = self.QueueCls(maxsize) + self.block = block + self.headers = headers or {} + + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTP connection (%d): %s" % + (self.num_connections, self.host)) + return HTTPConnection(host=self.host, port=self.port) + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + # If this is a persistent connection, check if it got disconnected + if conn and conn.sock and is_connection_dropped(conn): + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + + except Empty: + if self.block: + raise EmptyPoolError(self, + "Pool reached maximum size and no more " + "connections are allowed.") + pass # Oh well, we'll create a new connection then + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is discarded because we + exceeded maxsize. If connections are discarded frequently, then maxsize + should be increased. + """ + try: + self.pool.put(conn, block=False) + except Full: + # This should never happen if self.block == True + log.warning("HttpConnectionPool is full, discarding connection: %s" + % self.host) + + def _make_request(self, conn, method, url, timeout=_Default, + **httplib_request_kw): + """ + Perform a request on a given httplib connection object taken from our + pool. + """ + self.num_requests += 1 + + if timeout is _Default: + timeout = self.timeout + + conn.timeout = timeout # This only does anything in Py26+ + + conn.request(method, url, **httplib_request_kw) + conn.sock.settimeout(timeout) + httplib_response = conn.getresponse() + + log.debug("\"%s %s %s\" %s %s" % + (method, url, + conn._http_vsn_str, # pylint: disable-msg=W0212 + httplib_response.status, httplib_response.length)) + + return httplib_response + + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + # TODO: Add optional support for socket.gethostbyname checking. + scheme, host, port = get_host(url) + + if self.port and not port: + # Use explicit default port for comparison when none is given. + port = port_by_scheme.get(scheme) + + return (url.startswith('/') or + (scheme, host, port) == (self.scheme, self.host, self.port)) + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True, timeout=_Default, + pool_timeout=None, release_conn=None, **response_kw): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`.request`. + + .. note:: + + `release_conn` will only behave as expected if + `preload_content=False` because we want to make + `preload_content=False` the default behaviour someday soon without + breaking backwards compatibility. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Number of retries to allow before raising a MaxRetryError exception. + + :param redirect: + Automatically handle redirects (status codes 301, 302, 303, 307), + each redirect counts as a retry. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When False, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one request. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received (but will release if + you read the entire contents of the response such as when + `preload_content=True`). This is useful if you're not preloading + the response's content immediately. You will need to call + ``r.release_conn()`` on the response ``r`` to return the connection + back into the pool. If None, it takes the value of + ``response_kw.get('preload_content', True)``. + + :param \**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + + if retries < 0: + raise MaxRetryError(self, url) + + if timeout is _Default: + timeout = self.timeout + + if release_conn is None: + release_conn = response_kw.get('preload_content', True) + + # Check host + if assert_same_host and not self.is_same_host(url): + host = "%s://%s" % (self.scheme, self.host) + if self.port: + host = "%s:%d" % (host, self.port) + + raise HostChangedError(self, url, retries - 1) + + conn = None + + try: + # Request a connection from the queue + # (Could raise SocketError: Bad file descriptor) + conn = self._get_conn(timeout=pool_timeout) + + # Make the request on the httplib connection object + httplib_response = self._make_request(conn, method, url, + timeout=timeout, + body=body, headers=headers) + + # If we're going to release the connection in ``finally:``, then + # the request doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = not release_conn and conn + + # Import httplib's response into our own wrapper object + response = HTTPResponse.from_httplib(httplib_response, + pool=self, + connection=response_conn, + **response_kw) + + # else: + # The connection will be put back into the pool when + # ``response.release_conn()`` is called (implicitly by + # ``response.read()``) + + except Empty as e: + # Timed out by queue + raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" % + pool_timeout) + + except SocketTimeout as e: + # Timed out by socket + raise TimeoutError(self, "Request timed out. (timeout=%s)" % + timeout) + + except BaseSSLError as e: + # SSL certificate error + raise SSLError(e) + + except CertificateError as e: + # Name mismatch + raise SSLError(e) + + except (HTTPException, SocketError) as e: + # Connection broken, discard. It will be replaced next _get_conn(). + conn = None + # This is necessary so we can access e below + err = e + + finally: + if conn and release_conn: + # Put the connection back to be reused + self._put_conn(conn) + + if not conn: + log.warn("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, err, url)) + return self.urlopen(method, url, body, headers, retries - 1, + redirect, assert_same_host) # Try again + + # Handle redirect? + redirect_location = redirect and response.get_redirect_location() + if redirect_location: + log.info("Redirecting %s -> %s" % (url, redirect_location)) + return self.urlopen(method, redirect_location, body, headers, + retries - 1, redirect, assert_same_host) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:httplib.HTTPSConnection`. + + The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters + are only used if :mod:`ssl` is available and are fed into + :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + """ + + scheme = 'https' + + def __init__(self, host, port=None, + strict=False, timeout=None, maxsize=1, + block=False, headers=None, + key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + + super(HTTPSConnectionPool, self).__init__(host, port, + strict, timeout, maxsize, + block, headers) + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPSConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTPS connection (%d): %s" + % (self.num_connections, self.host)) + + if not ssl: # Platform-specific: Python compiled without +ssl + if not HTTPSConnection or HTTPSConnection is object: + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + + return HTTPSConnection(host=self.host, port=self.port) + + connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + return connection + + +## Helpers + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: :: + + >>> get_host('http://google.com/mail/') + ('http', 'google.com', None) + >>> get_host('google.com:80') + ('http', 'google.com', 80) + """ + + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + + if '://' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, _path = url.split('/', 1) + if '@' in url: + _auth, url = url.split('@', 1) + if ':' in url: + url, port = url.split(':', 1) + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s") + + port = int(port) + + return scheme, url, port + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example: :: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) + + +def is_connection_dropped(conn): + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + ``HTTPConnection`` object. + """ + if not poll: # Platform-specific + return select([conn.sock], [], [], 0.0)[0] + + # This version is better on platforms that support it. + p = poll() + p.register(conn.sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == conn.sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py new file mode 100644 index 0000000..bb41fd1 --- /dev/null +++ b/urllib3/contrib/ntlmpool.py @@ -0,0 +1,120 @@ +# urllib3/contrib/ntlmpool.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +try: + from http.client import HTTPSConnection +except ImportError: + from httplib import HTTPSConnection +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py new file mode 100644 index 0000000..15c9699 --- /dev/null +++ b/urllib3/exceptions.py @@ -0,0 +1,67 @@ +# urllib3/exceptions.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +## Base Exceptions + +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class PoolError(HTTPError): + "Base exception for errors caused within a pool." + def __init__(self, pool, message): + self.pool = pool + HTTPError.__init__(self, "%s: %s" % (pool, message)) + + +class SSLError(HTTPError): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +## Leaf Exceptions + +class MaxRetryError(PoolError): + "Raised when the maximum number of retries is exceeded." + + def __init__(self, pool, url): + message = "Max retries exceeded with url: %s" % url + PoolError.__init__(self, pool, message) + + self.url = url + + +class HostChangedError(PoolError): + "Raised when an existing pool gets a request for a foreign host." + + def __init__(self, pool, url, retries=3): + message = "Tried to open a foreign host with url: %s" % url + PoolError.__init__(self, pool, message) + + self.url = url + self.retries = retries + + +class TimeoutError(PoolError): + "Raised when a socket timeout occurs." + pass + + +class EmptyPoolError(PoolError): + "Raised when a pool runs out of connections and no more are allowed." + pass + + +class LocationParseError(ValueError, HTTPError): + "Raised when get_host or similar fails to parse the URL input." + + def __init__(self, location): + message = "Failed to parse: %s" % location + super(LocationParseError, self).__init__(self, message) + + self.location = location diff --git a/urllib3/filepost.py b/urllib3/filepost.py new file mode 100644 index 0000000..e1ec8af --- /dev/null +++ b/urllib3/filepost.py @@ -0,0 +1,74 @@ +# urllib3/filepost.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import codecs +import mimetypes + +try: + from mimetools import choose_boundary +except ImportError: + from .packages.mimetools_choose_boundary import choose_boundary + +from io import BytesIO + +from .packages import six +from .packages.six import b + +writer = codecs.lookup('utf-8')[3] + + +def get_content_type(filename): + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data mime format. + + :param fields: + Dictionary of fields. The key is treated as the field name, and the + value as the body of the form-data. If the value is a tuple of two + elements, then the first element is treated as the filename of the + form-data section. + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`mimetools.choose_boundary`. + """ + body = BytesIO() + if boundary is None: + boundary = choose_boundary() + + for fieldname, value in six.iteritems(fields): + body.write(b('--%s\r\n' % (boundary))) + + if isinstance(value, tuple): + filename, data = value + writer(body).write('Content-Disposition: form-data; name="%s"; ' + 'filename="%s"\r\n' % (fieldname, filename)) + body.write(b('Content-Type: %s\r\n\r\n' % + (get_content_type(filename)))) + else: + data = value + writer(body).write('Content-Disposition: form-data; name="%s"\r\n' + % (fieldname)) + body.write(b'Content-Type: text/plain\r\n\r\n') + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, six.text_type): + writer(body).write(data) + else: + body.write(data) + + body.write(b'\r\n') + + body.write(b('--%s--\r\n' % (boundary))) + + content_type = b('multipart/form-data; boundary=%s' % boundary) + + return body.getvalue(), content_type diff --git a/urllib3/packages/__init__.py b/urllib3/packages/__init__.py new file mode 100644 index 0000000..37e8351 --- /dev/null +++ b/urllib3/packages/__init__.py @@ -0,0 +1,4 @@ +from __future__ import absolute_import + +from . import ssl_match_hostname + diff --git a/urllib3/packages/mimetools_choose_boundary/__init__.py b/urllib3/packages/mimetools_choose_boundary/__init__.py new file mode 100644 index 0000000..a0109ab --- /dev/null +++ b/urllib3/packages/mimetools_choose_boundary/__init__.py @@ -0,0 +1,47 @@ +"""The function mimetools.choose_boundary() from Python 2.7, which seems to +have disappeared in Python 3 (although email.generator._make_boundary() might +work as a replacement?). + +Tweaked to use lock from threading rather than thread. +""" +import os +from threading import Lock +_counter_lock = Lock() + +_counter = 0 +def _get_next_counter(): + global _counter + with _counter_lock: + _counter += 1 + return _counter + +_prefix = None + +def choose_boundary(): + """Return a string usable as a multipart boundary. + + The string chosen is unique within a single program run, and + incorporates the user id (if available), process id (if available), + and current time. So it's very unlikely the returned string appears + in message text, but there's no guarantee. + + The boundary contains dots so you have to quote it in the header.""" + + global _prefix + import time + if _prefix is None: + import socket + try: + hostid = socket.gethostbyname(socket.gethostname()) + except socket.gaierror: + hostid = '127.0.0.1' + try: + uid = repr(os.getuid()) + except AttributeError: + uid = '1' + try: + pid = repr(os.getpid()) + except AttributeError: + pid = '1' + _prefix = hostid + '.' + uid + '.' + pid + return "%s.%.3f.%d" % (_prefix, time.time(), _get_next_counter()) diff --git a/urllib3/packages/six.py b/urllib3/packages/six.py new file mode 100644 index 0000000..a64f6fb --- /dev/null +++ b/urllib3/packages/six.py @@ -0,0 +1,372 @@ +"""Utilities for writing code that runs on Python 2 and 3""" + +#Copyright (c) 2010-2011 Benjamin Peterson + +#Permission is hereby granted, free of charge, to any person obtaining a copy of +#this software and associated documentation files (the "Software"), to deal in +#the Software without restriction, including without limitation the rights to +#use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +#the Software, and to permit persons to whom the Software is furnished to do so, +#subject to the following conditions: + +#The above copyright notice and this permission notice shall be included in all +#copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +#FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +#COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +#IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +#CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.1.0" + + +# True if we are running on Python 3. +PY3 = sys.version_info[0] == 3 + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) + # This is a bit ugly, but it avoids running this again. + delattr(tp, self.name) + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + + +class _MovedItems(types.ModuleType): + """Lazy loading of moved objects""" + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("reload_module", "__builtin__", "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("copyreg", "copy_reg"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("winreg", "_winreg"), +] +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) +del attr + +moves = sys.modules["six.moves"] = _MovedItems("moves") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_code = "__code__" + _func_defaults = "__defaults__" + + _iterkeys = "keys" + _itervalues = "values" + _iteritems = "items" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_code = "func_code" + _func_defaults = "func_defaults" + + _iterkeys = "iterkeys" + _itervalues = "itervalues" + _iteritems = "iteritems" + + +if PY3: + def get_unbound_function(unbound): + return unbound + + + advance_iterator = next + + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) +else: + def get_unbound_function(unbound): + return unbound.im_func + + + def advance_iterator(it): + return it.next() + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) + + +def iterkeys(d): + """Return an iterator over the keys of a dictionary.""" + return getattr(d, _iterkeys)() + +def itervalues(d): + """Return an iterator over the values of a dictionary.""" + return getattr(d, _itervalues)() + +def iteritems(d): + """Return an iterator over the (key, value) pairs of a dictionary.""" + return getattr(d, _iteritems)() + + +if PY3: + def b(s): + return s.encode("latin-1") + def u(s): + return s + if sys.version_info[1] <= 1: + def int2byte(i): + return bytes((i,)) + else: + # This is about 2x faster than the implementation above on 3.2+ + int2byte = operator.methodcaller("to_bytes", 1, "big") + import io + StringIO = io.StringIO + BytesIO = io.BytesIO +else: + def b(s): + return s + def u(s): + return unicode(s, "unicode_escape") + int2byte = chr + import StringIO + StringIO = BytesIO = StringIO.StringIO +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +if PY3: + import builtins + exec_ = getattr(builtins, "exec") + + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + + print_ = getattr(builtins, "print") + del builtins + +else: + def exec_(code, globs=None, locs=None): + """Execute code in a namespace.""" + if globs is None: + frame = sys._getframe(1) + globs = frame.f_globals + if locs is None: + locs = frame.f_locals + del frame + elif locs is None: + locs = globs + exec("""exec code in globs, locs""") + + + exec_("""def reraise(tp, value, tb=None): + raise tp, value, tb +""") + + + def print_(*args, **kwargs): + """The new-style print function.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + def write(data): + if not isinstance(data, basestring): + data = str(data) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) + +_add_doc(reraise, """Reraise an exception.""") + + +def with_metaclass(meta, base=object): + """Create a base class with a metaclass.""" + return meta("NewBase", (base,), {}) diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py new file mode 100644 index 0000000..9560b04 --- /dev/null +++ b/urllib3/packages/ssl_match_hostname/__init__.py @@ -0,0 +1,61 @@ +"""The match_hostname() function from Python 3.2, essential when using SSL.""" + +import re + +__version__ = '3.2.2' + +class CertificateError(ValueError): + pass + +def _dnsname_to_pat(dn): + pats = [] + for frag in dn.split(r'.'): + if frag == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + else: + # Otherwise, '*' matches any dotless fragment. + frag = re.escape(frag) + pats.append(frag.replace(r'\*', '[^.]*')) + return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules + are mostly followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py new file mode 100644 index 0000000..d42f35b --- /dev/null +++ b/urllib3/poolmanager.py @@ -0,0 +1,138 @@ +# urllib3/poolmanager.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import logging + +from ._collections import RecentlyUsedContainer +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connectionpool import get_host, connection_from_url, port_by_scheme +from .exceptions import HostChangedError +from .request import RequestMethods + + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] + + +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, + 'https': HTTPSConnectionPool, +} + +log = logging.getLogger(__name__) + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least recently + used pool. + + :param \**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example: :: + + >>> manager = PoolManager() + >>> r = manager.urlopen("http://google.com/") + >>> r = manager.urlopen("http://google.com/mail") + >>> r = manager.urlopen("http://yahoo.com/") + >>> len(r.pools) + 2 + + """ + + # TODO: Make sure there are no memory leaks here. + + def __init__(self, num_pools=10, **connection_pool_kw): + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer(num_pools) + + def connection_from_host(self, host, port=80, scheme='http'): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + Note that an appropriate ``port`` value is required here to normalize + connection pools in our container most effectively. + """ + pool_key = (scheme, host, port) + + # If the scheme, host, or port doesn't match existing open connections, + # open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + pool_cls = pool_classes_by_scheme[scheme] + pool = pool_cls(host, port, **self.connection_pool_kw) + + self.pools[pool_key] = pool + + return pool + + def connection_from_url(self, url): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url` but + doesn't pass any additional parameters to the + :class:`urllib3.connectionpool.ConnectionPool` constructor. + + Additional parameters are taken from the :class:`.PoolManager` + constructor. + """ + scheme, host, port = get_host(url) + + port = port or port_by_scheme.get(scheme, 80) + + return self.connection_from_host(host, port=port, scheme=scheme) + + def urlopen(self, method, url, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + + ``url`` must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + conn = self.connection_from_url(url) + try: + return conn.urlopen(method, url, **kw) + + except HostChangedError as e: + kw['retries'] = e.retries # Persist retries countdown + return self.urlopen(method, e.url, **kw) + + +class ProxyManager(RequestMethods): + """ + Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method + will make requests to any url through the defined proxy. + """ + + def __init__(self, proxy_pool): + self.proxy_pool = proxy_pool + + def _set_proxy_headers(self, headers=None): + headers = headers or {} + + # Same headers are curl passes for --proxy1.0 + headers['Accept'] = '*/*' + headers['Proxy-Connection'] = 'Keep-Alive' + + return headers + + def urlopen(self, method, url, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + kw['assert_same_host'] = False + kw['headers'] = self._set_proxy_headers(kw.get('headers')) + return self.proxy_pool.urlopen(method, url, **kw) + + +def proxy_from_url(url, **pool_kw): + proxy_pool = connection_from_url(url, **pool_kw) + return ProxyManager(proxy_pool) diff --git a/urllib3/request.py b/urllib3/request.py new file mode 100644 index 0000000..5ea26a0 --- /dev/null +++ b/urllib3/request.py @@ -0,0 +1,147 @@ +# urllib3/request.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode + +from .filepost import encode_multipart_formdata + + +__all__ = ['RequestMethods'] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.poolmanager.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are encoded + in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-orm-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + """ + + _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) + + _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + + def urlopen(self, method, url, body=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **kw): + raise NotImplemented("Classes extending RequestMethods must implement " + "their own ``urlopen`` method.") + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the option + to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + + if method in self._encode_url_methods: + return self.request_encode_url(method, url, fields=fields, + headers=headers, + **urlopen_kw) + else: + return self.request_encode_body(method, url, fields=fields, + headers=headers, + **urlopen_kw) + + def request_encode_url(self, method, url, fields=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if fields: + url += '?' + urlencode(fields) + return self.urlopen(method, url, **urlopen_kw) + + def request_encode_body(self, method, url, fields=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the + payload with the appropriate content type. Otherwise + :meth:`urllib.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request signing, + such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'nonamefile': ('contents of nonamefile field'), + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimick behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will be + overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if encode_multipart: + body, content_type = encode_multipart_formdata(fields or {}, + boundary=multipart_boundary) + else: + body, content_type = (urlencode(fields or {}), + 'application/x-www-form-urlencoded') + + headers = headers or {} + headers.update({'Content-Type': content_type}) + + return self.urlopen(method, url, body=body, headers=headers, + **urlopen_kw) + + # Deprecated: + + def get_url(self, url, fields=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_url('GET', url, fields=fields, + **urlopen_kw) + + def post_url(self, url, fields=None, headers=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_body('POST', url, fields=fields, + headers=headers, + **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py new file mode 100644 index 0000000..4dd431e --- /dev/null +++ b/urllib3/response.py @@ -0,0 +1,191 @@ +# urllib3/response.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import gzip +import logging +import zlib + +from io import BytesIO + +from .exceptions import HTTPError +from .packages.six import string_types as basestring + + +log = logging.getLogger(__name__) + + +def decode_gzip(data): + gzipper = gzip.GzipFile(fileobj=BytesIO(data)) + return gzipper.read() + + +def decode_deflate(data): + try: + return zlib.decompress(data) + except zlib.error: + return zlib.decompress(data, -zlib.MAX_WBITS) + + +class HTTPResponse(object): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param original_response: + When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + """ + + CONTENT_DECODERS = { + 'gzip': decode_gzip, + 'deflate': decode_deflate, + } + + def __init__(self, body='', headers=None, status=0, version=0, reason=None, + strict=0, preload_content=True, decode_content=True, + original_response=None, pool=None, connection=None): + self.headers = headers or {} + self.status = status + self.version = version + self.reason = reason + self.strict = strict + + self._decode_content = decode_content + self._body = body if body and isinstance(body, basestring) else None + self._fp = None + self._original_response = original_response + + self._pool = pool + self._connection = connection + + if hasattr(body, 'read'): + self._fp = body + + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in [301, 302, 303, 307]: + return self.headers.get('location') + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(cache_content=True) + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, decoding and caching + is skipped because we can't decode partial content nor does it make + sense to cache partial content as the full response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. (Overridden if ``amt`` is set.) + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + content_encoding = self.headers.get('content-encoding') + decoder = self.CONTENT_DECODERS.get(content_encoding) + if decode_content is None: + decode_content = self._decode_content + + if self._fp is None: + return + + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + else: + return self._fp.read(amt) + + try: + if decode_content and decoder: + data = decoder(data) + except IOError: + raise HTTPError("Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding) + + if cache_content: + self._body = data + + return data + + finally: + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + @classmethod + def from_httplib(ResponseCls, r, **response_kw): + """ + Given an :class:`httplib.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + + # HTTPResponse objects in Python 3 don't have a .strict attribute + strict = getattr(r, 'strict', 0) + return ResponseCls(body=r, + # In Python 3, the header keys are returned capitalised + headers=dict((k.lower(), v) for k,v in r.getheaders()), + status=r.status, + version=r.version, + reason=r.reason, + strict=strict, + original_response=r, + **response_kw) + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) -- cgit v1.2.3 From 77245469d4fbd400c6702cde35f9d9002540663e Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:31 -0700 Subject: Imported Upstream version 1.3 --- CHANGES.rst | 18 ++++++ PKG-INFO | 21 ++++++- README.rst | 1 + dummyserver/server.py | 4 +- test/test_collections.py | 1 + test/test_connectionpool.py | 10 +--- test/test_filepost.py | 85 +++++++++++++++++++++++++++ urllib3.egg-info/PKG-INFO | 21 ++++++- urllib3.egg-info/SOURCES.txt | 2 + urllib3/__init__.py | 18 ++---- urllib3/connectionpool.py | 134 +++++------------------------------------- urllib3/filepost.py | 24 ++++++-- urllib3/poolmanager.py | 4 +- urllib3/request.py | 21 +------ urllib3/response.py | 15 ++++- urllib3/util.py | 136 +++++++++++++++++++++++++++++++++++++++++++ 16 files changed, 342 insertions(+), 173 deletions(-) create mode 100644 test/test_filepost.py create mode 100644 urllib3/util.py diff --git a/CHANGES.rst b/CHANGES.rst index d998db8..5afdd62 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,24 @@ Changes ======= +1.3 (2012-03-25) +++++++++++++++++ + +* Removed pre-1.0 deprecated API. + +* Refactored helpers into a ``urllib3.util`` submodule. + +* Fixed multipart encoding to support list-of-tuples for keys with multiple + values. (Issue #48) + +* Fixed multiple Set-Cookie headers in response not getting merged properly in + Python 3. (Issue #53) + +* AppEngine support with Py27. (Issue #61) + +* Minor ``encode_multipart_formdata`` fixes related to Python 3 strings vs + bytes. + 1.2.2 (2012-02-06) ++++++++++++++++++ diff --git a/PKG-INFO b/PKG-INFO index 2f0ab55..17d8a02 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: urllib3 -Version: 1.2.2 +Version: 1.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -16,6 +16,7 @@ Description: Highlights - Built-in redirection and retries (optional). - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. + - Works with AppEngine, gevent, and eventlib. - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at @@ -108,6 +109,24 @@ Description: Highlights Changes ======= + 1.3 (2012-03-25) + ++++++++++++++++ + + * Removed pre-1.0 deprecated API. + + * Refactored helpers into a ``urllib3.util`` submodule. + + * Fixed multipart encoding to support list-of-tuples for keys with multiple + values. (Issue #48) + + * Fixed multiple Set-Cookie headers in response not getting merged properly in + Python 3. (Issue #53) + + * AppEngine support with Py27. (Issue #61) + + * Minor ``encode_multipart_formdata`` fixes related to Python 3 strings vs + bytes. + 1.2.2 (2012-02-06) ++++++++++++++++++ diff --git a/README.rst b/README.rst index b376c81..f177852 100644 --- a/README.rst +++ b/README.rst @@ -8,6 +8,7 @@ Highlights - Built-in redirection and retries (optional). - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. +- Works with AppEngine, gevent, and eventlib. - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at diff --git a/dummyserver/server.py b/dummyserver/server.py index 529850f..6c0943c 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -83,7 +83,7 @@ class TornadoServerThread(threading.Thread): else: http_server = tornado.httpserver.HTTPServer(container) - http_server.listen(self.port) + http_server.listen(self.port, address=self.host) return http_server def run(self): @@ -106,7 +106,7 @@ if __name__ == '__main__': if len(sys.argv) > 1: url = sys.argv[1] - print("Starting WGI server at: %s" % url) + print("Starting WSGI server at: %s" % url) scheme, host, port = get_host(url) t = TornadoServerThread(scheme=scheme, host=host, port=port) diff --git a/test/test_collections.py b/test/test_collections.py index f8275e0..6cb5aca 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -4,6 +4,7 @@ from urllib3._collections import RecentlyUsedContainer as Container from urllib3.packages import six xrange = six.moves.xrange + class TestLRUContainer(unittest.TestCase): def test_maxsize(self): d = Container(5) diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 4281d42..c32c6dc 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,11 +1,7 @@ import unittest -from urllib3.connectionpool import ( - connection_from_url, - get_host, - HTTPConnectionPool, - make_headers) - +from urllib3.connectionpool import connection_from_url, HTTPConnectionPool +from urllib3.util import get_host, make_headers from urllib3.exceptions import EmptyPoolError, LocationParseError @@ -105,7 +101,7 @@ class TestConnectionPool(unittest.TestCase): pass try: - pool.get_url('/', pool_timeout=0.01) + pool.request('GET', '/', pool_timeout=0.01) self.fail("Managed to get a connection without EmptyPoolError") except EmptyPoolError: pass diff --git a/test/test_filepost.py b/test/test_filepost.py new file mode 100644 index 0000000..c251778 --- /dev/null +++ b/test/test_filepost.py @@ -0,0 +1,85 @@ +import unittest + +from urllib3.filepost import encode_multipart_formdata, iter_fields +from urllib3.packages.six import b, u + + +BOUNDARY = '!! test boundary !!' + + +class TestIterfields(unittest.TestCase): + + def test_dict(self): + for fieldname, value in iter_fields(dict(a='b')): + self.assertEqual((fieldname, value), ('a', 'b')) + + self.assertEqual( + list(sorted(iter_fields(dict(a='b', c='d')))), + [('a', 'b'), ('c', 'd')]) + + def test_tuple_list(self): + for fieldname, value in iter_fields([('a', 'b')]): + self.assertEqual((fieldname, value), ('a', 'b')) + + self.assertEqual( + list(iter_fields([('a', 'b'), ('c', 'd')])), + [('a', 'b'), ('c', 'd')]) + + +class TestMultipartEncoding(unittest.TestCase): + + def test_input_datastructures(self): + fieldsets = [ + dict(k='v', k2='v2'), + [('k', 'v'), ('k2', 'v2')], + ] + + for fields in fieldsets: + encoded, _ = encode_multipart_formdata(fields, boundary=BOUNDARY) + self.assertEqual(encoded.count(b(BOUNDARY)), 3) + + + def test_field_encoding(self): + fieldsets = [ + [('k', 'v'), ('k2', 'v2')], + [('k', b'v'), (u('k2'), b'v2')], + [('k', b'v'), (u('k2'), 'v2')], + ] + + for fields in fieldsets: + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k2"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v2\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + , fields) + + self.assertEqual(content_type, + b'multipart/form-data; boundary=' + b(BOUNDARY)) + + + def test_filename(self): + fields = [('k', ('somename', b'v'))] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"; filename="somename"\r\n' + b'Content-Type: application/octet-stream\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) + + self.assertEqual(content_type, + b'multipart/form-data; boundary=' + b(BOUNDARY)) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 2f0ab55..17d8a02 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: urllib3 -Version: 1.2.2 +Version: 1.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -16,6 +16,7 @@ Description: Highlights - Built-in redirection and retries (optional). - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. + - Works with AppEngine, gevent, and eventlib. - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at @@ -108,6 +109,24 @@ Description: Highlights Changes ======= + 1.3 (2012-03-25) + ++++++++++++++++ + + * Removed pre-1.0 deprecated API. + + * Refactored helpers into a ``urllib3.util`` submodule. + + * Fixed multipart encoding to support list-of-tuples for keys with multiple + values. (Issue #48) + + * Fixed multiple Set-Cookie headers in response not getting merged properly in + Python 3. (Issue #53) + + * AppEngine support with Py27. (Issue #61) + + * Minor ``encode_multipart_formdata`` fixes related to Python 3 strings vs + bytes. + 1.2.2 (2012-02-06) ++++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index d7cbb3d..d79710b 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -14,6 +14,7 @@ test/__init__.py test/benchmark.py test/test_collections.py test/test_connectionpool.py +test/test_filepost.py test/test_poolmanager.py test/test_response.py urllib3/__init__.py @@ -24,6 +25,7 @@ urllib3/filepost.py urllib3/poolmanager.py urllib3/request.py urllib3/response.py +urllib3/util.py urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 2e9c663..2d6fece 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,26 +10,20 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.2.2' +__version__ = '1.3' from .connectionpool import ( HTTPConnectionPool, HTTPSConnectionPool, - connection_from_url, - get_host, - make_headers) - - -from .exceptions import ( - HTTPError, - MaxRetryError, - SSLError, - TimeoutError) + connection_from_url +) +from . import exceptions +from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .filepost import encode_multipart_formdata +from .util import make_headers, get_host # Set default logging handler to avoid "No handler found" warnings. diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 39e652e..c3cb3b1 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -7,15 +7,8 @@ import logging import socket -from base64 import b64encode from socket import error as SocketError, timeout as SocketTimeout -try: - from select import poll, POLLIN -except ImportError: # Doesn't exist on OSX and other platforms - from select import select - poll = False - try: # Python 3 from http.client import HTTPConnection, HTTPException from http.client import HTTP_PORT, HTTPS_PORT @@ -42,17 +35,16 @@ try: # Compiled with SSL? import ssl BaseSSLError = ssl.SSLError -except ImportError: +except (ImportError, AttributeError): pass -from .packages.ssl_match_hostname import match_hostname, CertificateError from .request import RequestMethods from .response import HTTPResponse +from .util import get_host, is_connection_dropped from .exceptions import ( EmptyPoolError, HostChangedError, - LocationParseError, MaxRetryError, SSLError, TimeoutError, @@ -61,6 +53,7 @@ from .exceptions import ( from .packages.ssl_match_hostname import match_hostname, CertificateError from .packages import six + xrange = six.moves.xrange log = logging.getLogger(__name__) @@ -72,6 +65,7 @@ port_by_scheme = { 'https': HTTPS_PORT, } + ## Connection objects (extension of httplib) class VerifiedHTTPSConnection(HTTPSConnection): @@ -107,6 +101,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): if self.ca_certs: match_hostname(self.sock.getpeercert(), self.host) + ## Pool objects class ConnectionPool(object): @@ -212,7 +207,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = self.pool.get(block=self.block, timeout=timeout) # If this is a persistent connection, check if it got disconnected - if conn and conn.sock and is_connection_dropped(conn): + if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() @@ -256,9 +251,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout = self.timeout conn.timeout = timeout # This only does anything in Py26+ - conn.request(method, url, **httplib_request_kw) - conn.sock.settimeout(timeout) + + # Set timeout + sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. + if sock: + sock.settimeout(timeout) + httplib_response = conn.getresponse() log.debug("\"%s %s %s\" %s %s" % @@ -295,7 +294,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): .. note:: More commonly, it's appropriate to use a convenience method provided - by :class:`.RequestMethods`, such as :meth:`.request`. + by :class:`.RequestMethods`, such as :meth:`request`. .. note:: @@ -495,94 +494,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): return connection -## Helpers - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - return headers - - -def get_host(url): - """ - Given a url, return its scheme, host and port (None if it's not there). - - For example: :: - - >>> get_host('http://google.com/mail/') - ('http', 'google.com', None) - >>> get_host('google.com:80') - ('http', 'google.com', 80) - """ - - # This code is actually similar to urlparse.urlsplit, but much - # simplified for our needs. - port = None - scheme = 'http' - - if '://' in url: - scheme, url = url.split('://', 1) - if '/' in url: - url, _path = url.split('/', 1) - if '@' in url: - _auth, url = url.split('@', 1) - if ':' in url: - url, port = url.split(':', 1) - - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s") - - port = int(port) - - return scheme, url, port - - def connection_from_url(url, **kw): """ Given a url, return an :class:`.ConnectionPool` instance of its host. @@ -608,22 +519,3 @@ def connection_from_url(url, **kw): return HTTPSConnectionPool(host, port=port, **kw) else: return HTTPConnectionPool(host, port=port, **kw) - - -def is_connection_dropped(conn): - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - ``HTTPConnection`` object. - """ - if not poll: # Platform-specific - return select([conn.sock], [], [], 0.0)[0] - - # This version is better on platforms that support it. - p = poll() - p.register(conn.sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == conn.sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e1ec8af..344a103 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -24,15 +24,29 @@ def get_content_type(filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_fields(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts. + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) + + def encode_multipart_formdata(fields, boundary=None): """ Encode a dictionary of ``fields`` using the multipart/form-data mime format. :param fields: - Dictionary of fields. The key is treated as the field name, and the - value as the body of the form-data. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section. + Dictionary of fields or list of (key, value) field tuples. The key is + treated as the field name, and the value as the body of the form-data + bytes. If the value is a tuple of two elements, then the first element + is treated as the filename of the form-data section. + + Field names and filenames must be unicode. :param boundary: If not specified, then a random boundary will be generated using @@ -42,7 +56,7 @@ def encode_multipart_formdata(fields, boundary=None): if boundary is None: boundary = choose_boundary() - for fieldname, value in six.iteritems(fields): + for fieldname, value in iter_fields(fields): body.write(b('--%s\r\n' % (boundary))) if isinstance(value, tuple): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index d42f35b..310ea21 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -39,11 +39,11 @@ class PoolManager(RequestMethods): Example: :: - >>> manager = PoolManager() + >>> manager = PoolManager(num_pools=2) >>> r = manager.urlopen("http://google.com/") >>> r = manager.urlopen("http://google.com/mail") >>> r = manager.urlopen("http://yahoo.com/") - >>> len(r.pools) + >>> len(manager.pools) 2 """ diff --git a/urllib3/request.py b/urllib3/request.py index 5ea26a0..569ac96 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -44,7 +44,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -126,22 +126,3 @@ class RequestMethods(object): return self.urlopen(method, url, body=body, headers=headers, **urlopen_kw) - - # Deprecated: - - def get_url(self, url, fields=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_url('GET', url, fields=fields, - **urlopen_kw) - - def post_url(self, url, fields=None, headers=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_body('POST', url, fields=fields, - headers=headers, - **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py index 4dd431e..5fab824 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -171,11 +171,22 @@ class HTTPResponse(object): with ``original_response=r``. """ + # Normalize headers between different versions of Python + headers = {} + for k, v in r.getheaders(): + # Python 3: Header keys are returned capitalised + k = k.lower() + + has_value = headers.get(k) + if has_value: # Python 3: Repeating header keys are unmerged. + v = ', '.join([has_value, v]) + + headers[k] = v + # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) return ResponseCls(body=r, - # In Python 3, the header keys are returned capitalised - headers=dict((k.lower(), v) for k,v in r.getheaders()), + headers=headers, status=r.status, version=r.version, reason=r.reason, diff --git a/urllib3/util.py b/urllib3/util.py new file mode 100644 index 0000000..2684a2f --- /dev/null +++ b/urllib3/util.py @@ -0,0 +1,136 @@ +# urllib3/util.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +from base64 import b64encode + +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + +from .packages import six +from .exceptions import LocationParseError + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: :: + + >>> get_host('http://google.com/mail/') + ('http', 'google.com', None) + >>> get_host('google.com:80') + ('http', 'google.com', 80) + """ + + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + + if '://' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, _path = url.split('/', 1) + if '@' in url: + _auth, url = url.split('@', 1) + if ':' in url: + url, port = url.split(':', 1) + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + + port = int(port) + + return scheme, url, port + + + +def is_connection_dropped(conn): + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + ``HTTPConnection`` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if not sock: #Platform-specific: AppEngine + return False + + if not poll: # Platform-specific + if not select: #Platform-specific: AppEngine + return False + + return select([sock], [], [], 0.0)[0] + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True -- cgit v1.2.3 From e5b66555b54a9854b340975471e8cdfa64e311f7 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:32 -0700 Subject: Imported Upstream version 1.5 --- CHANGES.rst | 37 +++ CONTRIBUTORS.txt | 6 + PKG-INFO | 43 +++- README.rst | 2 +- dummyserver/handlers.py | 10 +- setup.py | 2 +- test-requirements.txt | 2 +- test/__init__.py | 0 test/benchmark.py | 77 ------ test/test_collections.py | 55 +++-- test/test_connectionpool.py | 140 ++++++----- test/test_poolmanager.py | 24 ++ test/test_response.py | 4 +- test/test_util.py | 150 ++++++++++++ urllib3.egg-info/PKG-INFO | 43 +++- urllib3.egg-info/SOURCES.txt | 7 +- urllib3/__init__.py | 22 +- urllib3/_collections.py | 169 ++++++-------- urllib3/connectionpool.py | 99 +++++--- urllib3/contrib/__init__.py | 0 urllib3/contrib/ntlmpool.py | 120 ---------- urllib3/exceptions.py | 10 + urllib3/filepost.py | 13 +- .../packages/mimetools_choose_boundary/__init__.py | 47 ---- urllib3/packages/ordered_dict.py | 260 +++++++++++++++++++++ urllib3/poolmanager.py | 64 +++-- urllib3/response.py | 8 +- urllib3/util.py | 194 +++++++++++---- 28 files changed, 1061 insertions(+), 547 deletions(-) delete mode 100644 test/__init__.py delete mode 100644 test/benchmark.py create mode 100644 test/test_util.py delete mode 100644 urllib3/contrib/__init__.py delete mode 100644 urllib3/contrib/ntlmpool.py delete mode 100644 urllib3/packages/mimetools_choose_boundary/__init__.py create mode 100644 urllib3/packages/ordered_dict.py diff --git a/CHANGES.rst b/CHANGES.rst index 5afdd62..a0cbdb3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,43 @@ Changes ======= +1.5 (2012-08-02) +++++++++++++++++ + +* Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug + logging in urllib3. + +* Native full URL parsing (including auth, path, query, fragment) available in + ``urllib3.util.parse_url(url)``. + +* Built-in redirect will switch method to 'GET' if status code is 303. + (Issue #11) + +* ``urllib3.PoolManager`` strips the scheme and host before sending the request + uri. (Issue #8) + +* New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, + based on the Content-Type header, fails. + +* Fixed bug with pool depletion and leaking connections (Issue #76). Added + explicit connection closing on pool eviction. Added + ``urllib3.PoolManager.clear()``. + +* 99% -> 100% unit test coverage. + + +1.4 (2012-06-16) +++++++++++++++++ + +* Minor AppEngine-related fixes. + +* Switched from ``mimetools.choose_boundary`` to ``uuid.uuid4()``. + +* Improved url parsing. (Issue #73) + +* IPv6 url support. (Issue #72) + + 1.3 (2012-03-25) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 37140ca..7dfbcaf 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -39,5 +39,11 @@ In chronological order: * brandon-rhodes * Design review, bugfixes, test coverage. +* studer + * IPv6 url support and test coverage + +* Shivaram Lingamneni + * Support for explicitly closing pooled connections + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/PKG-INFO b/PKG-INFO index 17d8a02..4e79ea8 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.3 +Version: 1.5 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -17,7 +17,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -109,6 +109,43 @@ Description: Highlights Changes ======= + 1.5 (2012-08-02) + ++++++++++++++++ + + * Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug + logging in urllib3. + + * Native full URL parsing (including auth, path, query, fragment) available in + ``urllib3.util.parse_url(url)``. + + * Built-in redirect will switch method to 'GET' if status code is 303. + (Issue #11) + + * ``urllib3.PoolManager`` strips the scheme and host before sending the request + uri. (Issue #8) + + * New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, + based on the Content-Type header, fails. + + * Fixed bug with pool depletion and leaking connections (Issue #76). Added + explicit connection closing on pool eviction. Added + ``urllib3.PoolManager.clear()``. + + * 99% -> 100% unit test coverage. + + + 1.4 (2012-06-16) + ++++++++++++++++ + + * Minor AppEngine-related fixes. + + * Switched from ``mimetools.choose_boundary`` to ``uuid.uuid4()``. + + * Improved url parsing. (Issue #73) + + * IPv6 url support. (Issue #72) + + 1.3 (2012-03-25) ++++++++++++++++ diff --git a/README.rst b/README.rst index f177852..144df0e 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,7 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. +- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index 3e32881..ca809ad 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -145,14 +145,20 @@ class TestingApp(WSGIHandler): data = b"hello, world!" encoding = request.headers.get('Accept-Encoding', '') headers = None - if 'gzip' in encoding: + if encoding == 'gzip': headers = [('Content-Encoding', 'gzip')] file_ = BytesIO() gzip.GzipFile('', mode='w', fileobj=file_).write(data) data = file_.getvalue() - elif 'deflate' in encoding: + elif encoding == 'deflate': headers = [('Content-Encoding', 'deflate')] data = zlib.compress(data) + elif encoding == 'garbage-gzip': + headers = [('Content-Encoding', 'gzip')] + data = 'garbage' + elif encoding == 'garbage-deflate': + headers = [('Content-Encoding', 'deflate')] + data = 'garbage' return Response(data, headers=headers) def shutdown(self, request): diff --git a/setup.py b/setup.py index f093f34..84d6e7f 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ setup(name='urllib3', url='http://urllib3.readthedocs.org/', license='MIT', packages=['urllib3', 'dummyserver', 'urllib3.packages', - 'urllib3.packages.ssl_match_hostname', 'urllib3.packages.mimetools_choose_boundary', + 'urllib3.packages.ssl_match_hostname', ], requires=requirements, tests_require=tests_requirements, diff --git a/test-requirements.txt b/test-requirements.txt index 568b0d4..e2d1579 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,2 @@ nose -tornado +tornado==2.1.1 diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/benchmark.py b/test/benchmark.py deleted file mode 100644 index e7049c4..0000000 --- a/test/benchmark.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -""" -Really simple rudimentary benchmark to compare ConnectionPool versus standard -urllib to demonstrate the usefulness of connection re-using. -""" -from __future__ import print_function - -import sys -import time -import urllib - -sys.path.append('../') -import urllib3 - - -# URLs to download. Doesn't matter as long as they're from the same host, so we -# can take advantage of connection re-using. -TO_DOWNLOAD = [ - 'http://code.google.com/apis/apps/', - 'http://code.google.com/apis/base/', - 'http://code.google.com/apis/blogger/', - 'http://code.google.com/apis/calendar/', - 'http://code.google.com/apis/codesearch/', - 'http://code.google.com/apis/contact/', - 'http://code.google.com/apis/books/', - 'http://code.google.com/apis/documents/', - 'http://code.google.com/apis/finance/', - 'http://code.google.com/apis/health/', - 'http://code.google.com/apis/notebook/', - 'http://code.google.com/apis/picasaweb/', - 'http://code.google.com/apis/spreadsheets/', - 'http://code.google.com/apis/webmastertools/', - 'http://code.google.com/apis/youtube/', -] - - -def urllib_get(url_list): - assert url_list - for url in url_list: - now = time.time() - r = urllib.urlopen(url) - elapsed = time.time() - now - print("Got in %0.3f: %s" % (elapsed, url)) - - -def pool_get(url_list): - assert url_list - pool = urllib3.connection_from_url(url_list[0]) - for url in url_list: - now = time.time() - r = pool.get_url(url) - elapsed = time.time() - now - print("Got in %0.3fs: %s" % (elapsed, url)) - - -if __name__ == '__main__': - print("Running pool_get ...") - now = time.time() - pool_get(TO_DOWNLOAD) - pool_elapsed = time.time() - now - - print("Running urllib_get ...") - now = time.time() - urllib_get(TO_DOWNLOAD) - urllib_elapsed = time.time() - now - - print("Completed pool_get in %0.3fs" % pool_elapsed) - print("Completed urllib_get in %0.3fs" % urllib_elapsed) - - -""" -Example results: - -Completed pool_get in 1.163s -Completed urllib_get in 2.318s -""" diff --git a/test/test_collections.py b/test/test_collections.py index 6cb5aca..098b31a 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -36,19 +36,7 @@ class TestLRUContainer(unittest.TestCase): d[5] = '5' # Check state - self.assertEqual(list(d.keys()), [0, 2, 3, 4, 5]) - - def test_pruning(self): - d = Container(5) - - for i in xrange(5): - d[i] = str(i) - - # Contend 2 entries for the most-used slot to balloon the heap - for i in xrange(100): - d.get(i % 2) - - self.assertTrue(len(d.access_log) <= d.CLEANUP_FACTOR * d._maxsize) + self.assertEqual(list(d.keys()), [2, 3, 4, 0, 5]) def test_same_key(self): d = Container(5) @@ -57,10 +45,7 @@ class TestLRUContainer(unittest.TestCase): d['foo'] = i self.assertEqual(list(d.keys()), ['foo']) - - d._prune_invalidated_entries() - - self.assertEqual(len(d.access_log), 1) + self.assertEqual(len(d), 1) def test_access_ordering(self): d = Container(5) @@ -68,13 +53,14 @@ class TestLRUContainer(unittest.TestCase): for i in xrange(10): d[i] = True - self.assertEqual(d._get_ordered_access_keys(), [9,8,7,6,5]) + # Keys should be ordered by access time + self.assertEqual(list(d.keys()), [5, 6, 7, 8, 9]) new_order = [7,8,6,9,5] - for k in reversed(new_order): + for k in new_order: d[k] - self.assertEqual(d._get_ordered_access_keys(), new_order) + self.assertEqual(list(d.keys()), new_order) def test_delete(self): d = Container(5) @@ -107,6 +93,35 @@ class TestLRUContainer(unittest.TestCase): self.assertRaises(KeyError, lambda: d[5]) + def test_disposal(self): + evicted_items = [] + + def dispose_func(arg): + # Save the evicted datum for inspection + evicted_items.append(arg) + + d = Container(5, dispose_func=dispose_func) + for i in xrange(5): + d[i] = i + self.assertEqual(list(d.keys()), list(xrange(5))) + self.assertEqual(evicted_items, []) # Nothing disposed + + d[5] = 5 + self.assertEqual(list(d.keys()), list(xrange(1, 6))) + self.assertEqual(evicted_items, [0]) + + del d[1] + self.assertEqual(evicted_items, [0, 1]) + + d.clear() + self.assertEqual(evicted_items, [0, 1, 2, 3, 4, 5]) + + def test_iter(self): + d = Container() + + with self.assertRaises(NotImplementedError): + for i in d: + self.fail("Iteration shouldn't be implemented.") if __name__ == '__main__': unittest.main() diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index c32c6dc..afc3098 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,30 +1,28 @@ import unittest from urllib3.connectionpool import connection_from_url, HTTPConnectionPool -from urllib3.util import get_host, make_headers -from urllib3.exceptions import EmptyPoolError, LocationParseError +from urllib3.packages.ssl_match_hostname import CertificateError +from urllib3.exceptions import ( + ClosedPoolError, + EmptyPoolError, + HostChangedError, + MaxRetryError, + SSLError, + TimeoutError, +) + +from socket import timeout as SocketTimeout +from ssl import SSLError as BaseSSLError + +try: # Python 3 + from queue import Empty + from http.client import HTTPException +except ImportError: + from Queue import Empty + from httplib import HTTPException class TestConnectionPool(unittest.TestCase): - def test_get_host(self): - url_host_map = { - 'http://google.com/mail': ('http', 'google.com', None), - 'http://google.com/mail/': ('http', 'google.com', None), - 'google.com/mail': ('http', 'google.com', None), - 'http://google.com/': ('http', 'google.com', None), - 'http://google.com': ('http', 'google.com', None), - 'http://www.google.com': ('http', 'www.google.com', None), - 'http://mail.google.com': ('http', 'mail.google.com', None), - 'http://google.com:8000/mail/': ('http', 'google.com', 8000), - 'http://google.com:8000': ('http', 'google.com', 8000), - 'https://google.com': ('https', 'google.com', None), - 'https://google.com:8000': ('https', 'google.com', 8000), - 'http://user:password@127.0.0.1:1234': ('http', '127.0.0.1', 1234), - } - for url, expected_host in url_host_map.items(): - returned_host = get_host(url) - self.assertEquals(returned_host, expected_host) - def test_same_host(self): same_host = [ ('http://google.com/', '/'), @@ -50,45 +48,6 @@ class TestConnectionPool(unittest.TestCase): c = connection_from_url(a) self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) - def test_invalid_host(self): - # TODO: Add more tests - invalid_host = [ - 'http://google.com:foo', - ] - - for location in invalid_host: - self.assertRaises(LocationParseError, get_host, location) - - - def test_make_headers(self): - self.assertEqual( - make_headers(accept_encoding=True), - {'accept-encoding': 'gzip,deflate'}) - - self.assertEqual( - make_headers(accept_encoding='foo,bar'), - {'accept-encoding': 'foo,bar'}) - - self.assertEqual( - make_headers(accept_encoding=['foo', 'bar']), - {'accept-encoding': 'foo,bar'}) - - self.assertEqual( - make_headers(accept_encoding=True, user_agent='banana'), - {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}) - - self.assertEqual( - make_headers(user_agent='banana'), - {'user-agent': 'banana'}) - - self.assertEqual( - make_headers(keep_alive=True), - {'connection': 'keep-alive'}) - - self.assertEqual( - make_headers(basic_auth='foo:bar'), - {'authorization': 'Basic Zm9vOmJhcg=='}) - def test_max_connections(self): pool = HTTPConnectionPool(host='localhost', maxsize=1, block=True) @@ -127,6 +86,67 @@ class TestConnectionPool(unittest.TestCase): str(EmptyPoolError(HTTPConnectionPool(host='localhost'), "Test.")), "HTTPConnectionPool(host='localhost', port=None): Test.") + def test_pool_size(self): + POOL_SIZE = 1 + pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True) + + def _raise(ex): + raise ex() + + def _test(exception, expect): + pool._make_request = lambda *args, **kwargs: _raise(exception) + with self.assertRaises(expect): + pool.request('GET', '/') + + self.assertEqual(pool.pool.qsize(), POOL_SIZE) + + #make sure that all of the exceptions return the connection to the pool + _test(Empty, TimeoutError) + _test(SocketTimeout, TimeoutError) + _test(BaseSSLError, SSLError) + _test(CertificateError, SSLError) + + # The pool should never be empty, and with these two exceptions being raised, + # a retry will be triggered, but that retry will fail, eventually raising + # MaxRetryError, not EmptyPoolError + # See: https://github.com/shazow/urllib3/issues/76 + pool._make_request = lambda *args, **kwargs: _raise(HTTPException) + with self.assertRaises(MaxRetryError): + pool.request('GET', '/', retries=1, pool_timeout=0.01) + self.assertEqual(pool.pool.qsize(), POOL_SIZE) + + def test_assert_same_host(self): + c = connection_from_url('http://google.com:80') + + with self.assertRaises(HostChangedError): + c.request('GET', 'http://yahoo.com:80', assert_same_host=True) + + def test_pool_close(self): + pool = connection_from_url('http://google.com:80') + + # Populate with some connections + conn1 = pool._get_conn() + conn2 = pool._get_conn() + conn3 = pool._get_conn() + pool._put_conn(conn1) + pool._put_conn(conn2) + + old_pool_queue = pool.pool + + pool.close() + self.assertEqual(pool.pool, None) + + with self.assertRaises(ClosedPoolError): + pool._get_conn() + + pool._put_conn(conn3) + + with self.assertRaises(ClosedPoolError): + pool._get_conn() + + with self.assertRaises(Empty): + old_pool_queue.get(block=False) + if __name__ == '__main__': unittest.main() diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 12722f7..273abf9 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -2,6 +2,7 @@ import unittest from urllib3.poolmanager import PoolManager from urllib3 import connection_from_url +from urllib3.exceptions import ClosedPoolError class TestPoolManager(unittest.TestCase): @@ -42,6 +43,29 @@ class TestPoolManager(unittest.TestCase): self.assertEqual(len(connections), 5) + def test_manager_clear(self): + p = PoolManager(5) + + conn_pool = p.connection_from_url('http://google.com') + self.assertEqual(len(p.pools), 1) + + conn = conn_pool._get_conn() + + p.clear() + self.assertEqual(len(p.pools), 0) + + with self.assertRaises(ClosedPoolError): + conn_pool._get_conn() + + conn_pool._put_conn(conn) + + with self.assertRaises(ClosedPoolError): + conn_pool._get_conn() + + self.assertEqual(len(p.pools), 0) + + + if __name__ == '__main__': unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 0ef379c..964f677 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -1,9 +1,9 @@ import unittest -import zlib from io import BytesIO from urllib3.response import HTTPResponse +from urllib3.exceptions import DecodeError class TestLegacyResponse(unittest.TestCase): def test_getheaders(self): @@ -50,7 +50,7 @@ class TestResponse(unittest.TestCase): def test_decode_bad_data(self): fp = BytesIO(b'\x00' * 10) - self.assertRaises(zlib.error, HTTPResponse, fp, headers={ + self.assertRaises(DecodeError, HTTPResponse, fp, headers={ 'content-encoding': 'deflate' }) diff --git a/test/test_util.py b/test/test_util.py new file mode 100644 index 0000000..a989da6 --- /dev/null +++ b/test/test_util.py @@ -0,0 +1,150 @@ +import unittest +import logging + +from urllib3 import add_stderr_logger +from urllib3.util import get_host, make_headers, split_first, parse_url, Url +from urllib3.exceptions import LocationParseError + + +class TestUtil(unittest.TestCase): + def test_get_host(self): + url_host_map = { + # Hosts + 'http://google.com/mail': ('http', 'google.com', None), + 'http://google.com/mail/': ('http', 'google.com', None), + 'google.com/mail': ('http', 'google.com', None), + 'http://google.com/': ('http', 'google.com', None), + 'http://google.com': ('http', 'google.com', None), + 'http://www.google.com': ('http', 'www.google.com', None), + 'http://mail.google.com': ('http', 'mail.google.com', None), + 'http://google.com:8000/mail/': ('http', 'google.com', 8000), + 'http://google.com:8000': ('http', 'google.com', 8000), + 'https://google.com': ('https', 'google.com', None), + 'https://google.com:8000': ('https', 'google.com', 8000), + 'http://user:password@127.0.0.1:1234': ('http', '127.0.0.1', 1234), + 'http://google.com/foo=http://bar:42/baz': ('http', 'google.com', None), + 'http://google.com?foo=http://bar:42/baz': ('http', 'google.com', None), + 'http://google.com#foo=http://bar:42/baz': ('http', 'google.com', None), + + # IPv4 + '173.194.35.7': ('http', '173.194.35.7', None), + 'http://173.194.35.7': ('http', '173.194.35.7', None), + 'http://173.194.35.7/test': ('http', '173.194.35.7', None), + 'http://173.194.35.7:80': ('http', '173.194.35.7', 80), + 'http://173.194.35.7:80/test': ('http', '173.194.35.7', 80), + + # IPv6 + '[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), + 'http://[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), + 'http://[2a00:1450:4001:c01::67]/test': ('http', '2a00:1450:4001:c01::67', None), + 'http://[2a00:1450:4001:c01::67]:80': ('http', '2a00:1450:4001:c01::67', 80), + 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '2a00:1450:4001:c01::67', 80), + + # More IPv6 from http://www.ietf.org/rfc/rfc2732.txt + 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210', 8000), + 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '1080:0:0:0:8:800:200C:417A', None), + 'http://[3ffe:2a00:100:7031::1]': ('http', '3ffe:2a00:100:7031::1', None), + 'http://[1080::8:800:200C:417A]/foo': ('http', '1080::8:800:200C:417A', None), + 'http://[::192.9.5.5]/ipng': ('http', '::192.9.5.5', None), + 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '::FFFF:129.144.52.38', 42), + 'http://[2010:836B:4179::836B:4179]': ('http', '2010:836B:4179::836B:4179', None), + } + for url, expected_host in url_host_map.items(): + returned_host = get_host(url) + self.assertEquals(returned_host, expected_host) + + def test_invalid_host(self): + # TODO: Add more tests + invalid_host = [ + 'http://google.com:foo', + ] + + for location in invalid_host: + self.assertRaises(LocationParseError, get_host, location) + + def test_parse_url(self): + url_host_map = { + 'http://google.com/mail': Url('http', host='google.com', path='/mail'), + 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), + 'google.com/mail': Url(host='google.com', path='/mail'), + 'http://google.com/': Url('http', host='google.com', path='/'), + 'http://google.com': Url('http', host='google.com'), + 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), + '': Url(), + '/': Url(path='/'), + '?': Url(path='', query=''), + '#': Url(path='', fragment=''), + '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'), + '/foo': Url(path='/foo'), + '/foo?bar=baz': Url(path='/foo', query='bar=baz'), + '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), + } + for url, expected_url in url_host_map.items(): + returned_url = parse_url(url) + self.assertEquals(returned_url, expected_url) + + def test_request_uri(self): + url_host_map = { + 'http://google.com/mail': '/mail', + 'http://google.com/mail/': '/mail/', + 'http://google.com/': '/', + 'http://google.com': '/', + '': '/', + '/': '/', + '?': '/?', + '#': '/', + '/foo?bar=baz': '/foo?bar=baz', + } + for url, expected_request_uri in url_host_map.items(): + returned_url = parse_url(url) + self.assertEquals(returned_url.request_uri, expected_request_uri) + + def test_make_headers(self): + self.assertEqual( + make_headers(accept_encoding=True), + {'accept-encoding': 'gzip,deflate'}) + + self.assertEqual( + make_headers(accept_encoding='foo,bar'), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=['foo', 'bar']), + {'accept-encoding': 'foo,bar'}) + + self.assertEqual( + make_headers(accept_encoding=True, user_agent='banana'), + {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}) + + self.assertEqual( + make_headers(user_agent='banana'), + {'user-agent': 'banana'}) + + self.assertEqual( + make_headers(keep_alive=True), + {'connection': 'keep-alive'}) + + self.assertEqual( + make_headers(basic_auth='foo:bar'), + {'authorization': 'Basic Zm9vOmJhcg=='}) + + + def test_split_first(self): + test_cases = { + ('abcd', 'b'): ('a', 'cd', 'b'), + ('abcd', 'cb'): ('a', 'cd', 'b'), + ('abcd', ''): ('abcd', '', None), + ('abcd', 'a'): ('', 'bcd', 'a'), + ('abcd', 'ab'): ('', 'bcd', 'a'), + } + for input, expected in test_cases.items(): + output = split_first(*input) + self.assertEqual(output, expected) + + def test_add_stderr_logger(self): + handler = add_stderr_logger(level=logging.INFO) # Don't actually print debug + logger = logging.getLogger('urllib3') + self.assertTrue(handler in logger.handlers) + + logger.debug('Testing add_stderr_logger') + logger.removeHandler(handler) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 17d8a02..4e79ea8 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.3 +Version: 1.5 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -17,7 +17,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -109,6 +109,43 @@ Description: Highlights Changes ======= + 1.5 (2012-08-02) + ++++++++++++++++ + + * Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug + logging in urllib3. + + * Native full URL parsing (including auth, path, query, fragment) available in + ``urllib3.util.parse_url(url)``. + + * Built-in redirect will switch method to 'GET' if status code is 303. + (Issue #11) + + * ``urllib3.PoolManager`` strips the scheme and host before sending the request + uri. (Issue #8) + + * New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, + based on the Content-Type header, fails. + + * Fixed bug with pool depletion and leaking connections (Issue #76). Added + explicit connection closing on pool eviction. Added + ``urllib3.PoolManager.clear()``. + + * 99% -> 100% unit test coverage. + + + 1.4 (2012-06-16) + ++++++++++++++++ + + * Minor AppEngine-related fixes. + + * Switched from ``mimetools.choose_boundary`` to ``uuid.uuid4()``. + + * Improved url parsing. (Issue #73) + + * IPv6 url support. (Issue #72) + + 1.3 (2012-03-25) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index d79710b..3155626 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -10,13 +10,12 @@ dummyserver/__init__.py dummyserver/handlers.py dummyserver/server.py dummyserver/testcase.py -test/__init__.py -test/benchmark.py test/test_collections.py test/test_connectionpool.py test/test_filepost.py test/test_poolmanager.py test/test_response.py +test/test_util.py urllib3/__init__.py urllib3/_collections.py urllib3/connectionpool.py @@ -30,9 +29,7 @@ urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt urllib3.egg-info/top_level.txt -urllib3/contrib/__init__.py -urllib3/contrib/ntlmpool.py urllib3/packages/__init__.py +urllib3/packages/ordered_dict.py urllib3/packages/six.py -urllib3/packages/mimetools_choose_boundary/__init__.py urllib3/packages/ssl_match_hostname/__init__.py \ No newline at end of file diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 2d6fece..b552543 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.3' +__version__ = '1.5' from .connectionpool import ( @@ -28,7 +28,7 @@ from .util import make_headers, get_host # Set default logging handler to avoid "No handler found" warnings. import logging -try: +try: # Python 2.7+ from logging import NullHandler except ImportError: class NullHandler(logging.Handler): @@ -37,6 +37,22 @@ except ImportError: logging.getLogger(__name__).addHandler(NullHandler()) +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. + """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug('Added an stderr logging handler to logger: %s' % __name__) + return handler + # ... Clean up. -del logging del NullHandler diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 3cef081..a052b1d 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -4,128 +4,91 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import deque +from collections import MutableMapping +from threading import Lock -from threading import RLock +try: # Python 2.7+ + from collections import OrderedDict +except ImportError: + from .packages.ordered_dict import OrderedDict -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer'] -class AccessEntry(object): - __slots__ = ('key', 'is_valid') - def __init__(self, key, is_valid=True): - self.key = key - self.is_valid = is_valid +_Null = object() -class RecentlyUsedContainer(dict): - """ - Provides a dict-like that maintains up to ``maxsize`` keys while throwing - away the least-recently-used keys beyond ``maxsize``. +class RecentlyUsedContainer(MutableMapping): """ + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. - # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we - # will attempt to cleanup the invalidated entries in the access_log - # datastructure during the next 'get' operation. - CLEANUP_FACTOR = 10 - - def __init__(self, maxsize=10): - self._maxsize = maxsize - - self._container = {} - - # We use a deque to to store our keys ordered by the last access. - self.access_log = deque() - self.access_log_lock = RLock() - - # We look up the access log entry by the key to invalidate it so we can - # insert a new authorative entry at the head without having to dig and - # find the old entry for removal immediately. - self.access_lookup = {} - - # Trigger a heap cleanup when we get past this size - self.access_log_limit = maxsize * self.CLEANUP_FACTOR - - def _invalidate_entry(self, key): - "If exists: Invalidate old entry and return it." - old_entry = self.access_lookup.get(key) - if old_entry: - old_entry.is_valid = False + :param maxsize: + Maximum number of recent elements to retain. - return old_entry - - def _push_entry(self, key): - "Push entry onto our access log, invalidate the old entry if exists." - self._invalidate_entry(key) - - new_entry = AccessEntry(key) - self.access_lookup[key] = new_entry - - self.access_log_lock.acquire() - self.access_log.appendleft(new_entry) - self.access_log_lock.release() - - def _prune_entries(self, num): - "Pop entries from our access log until we popped ``num`` valid ones." - while num > 0: - self.access_log_lock.acquire() - p = self.access_log.pop() - self.access_log_lock.release() - - if not p.is_valid: - continue # Invalidated entry, skip - - dict.pop(self, p.key, None) - self.access_lookup.pop(p.key, None) - num -= 1 + :param dispose_func: + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called + """ - def _prune_invalidated_entries(self): - "Rebuild our access_log without the invalidated entries." - self.access_log_lock.acquire() - self.access_log = deque(e for e in self.access_log if e.is_valid) - self.access_log_lock.release() + ContainerCls = OrderedDict - def _get_ordered_access_keys(self): - "Return ordered access keys for inspection. Used for testing." - self.access_log_lock.acquire() - r = [e.key for e in self.access_log if e.is_valid] - self.access_log_lock.release() + def __init__(self, maxsize=10, dispose_func=None): + self._maxsize = maxsize + self.dispose_func = dispose_func - return r + self._container = self.ContainerCls() + self._lock = Lock() def __getitem__(self, key): - item = dict.get(self, key) + # Re-insert the item, moving it to the end of the eviction line. + with self._lock: + item = self._container.pop(key) + self._container[key] = item + return item + + def __setitem__(self, key, value): + evicted_value = _Null + with self._lock: + # Possibly evict the existing value of 'key' + evicted_value = self._container.get(key, _Null) + self._container[key] = value + + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_value = self._container.popitem(last=False) + + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) - if not item: - raise KeyError(key) + def __delitem__(self, key): + with self._lock: + value = self._container.pop(key) - # Insert new entry with new high priority, also implicitly invalidates - # the old entry. - self._push_entry(key) + if self.dispose_func: + self.dispose_func(value) - if len(self.access_log) > self.access_log_limit: - # Heap is getting too big, try to clean up any tailing invalidated - # entries. - self._prune_invalidated_entries() + def __len__(self): + with self._lock: + return len(self._container) - return item + def __iter__(self): + raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') - def __setitem__(self, key, item): - # Add item to our container and access log - dict.__setitem__(self, key, item) - self._push_entry(key) + def clear(self): + with self._lock: + # Copy pointers to all values, then wipe the mapping + # under Python 2, this copies the list of values twice :-| + values = list(self._container.values()) + self._container.clear() - # Discard invalid and excess entries - self._prune_entries(len(self) - self._maxsize) + if self.dispose_func: + for value in values: + self.dispose_func(value) - def __delitem__(self, key): - self._invalidate_entry(key) - self.access_lookup.pop(key, None) - dict.__delitem__(self, key) - - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default + def keys(self): + with self._lock: + return self._container.keys() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index c3cb3b1..97da544 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -7,27 +7,27 @@ import logging import socket -from socket import error as SocketError, timeout as SocketTimeout +from socket import timeout as SocketTimeout -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection, HTTPException from http.client import HTTP_PORT, HTTPS_PORT except ImportError: from httplib import HTTPConnection, HTTPException from httplib import HTTP_PORT, HTTPS_PORT -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full -try: # Compiled with SSL? +try: # Compiled with SSL? HTTPSConnection = object BaseSSLError = None ssl = None - try: # Python 3 + try: # Python 3 from http.client import HTTPSConnection except ImportError: from httplib import HTTPSConnection @@ -35,7 +35,7 @@ try: # Compiled with SSL? import ssl BaseSSLError = ssl.SSLError -except (ImportError, AttributeError): +except (ImportError, AttributeError): # Platform-specific: No SSL. pass @@ -43,6 +43,7 @@ from .request import RequestMethods from .response import HTTPResponse from .util import get_host, is_connection_dropped from .exceptions import ( + ClosedPoolError, EmptyPoolError, HostChangedError, MaxRetryError, @@ -206,10 +207,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - # If this is a persistent connection, check if it got disconnected - if conn and is_connection_dropped(conn): - log.info("Resetting dropped connection: %s" % self.host) - conn.close() + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") except Empty: if self.block: @@ -218,6 +217,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): "connections are allowed.") pass # Oh well, we'll create a new connection then + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + return conn or self._new_conn() def _put_conn(self, conn): @@ -228,17 +232,26 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Connection object for the current host and port as returned by :meth:`._new_conn` or :meth:`._get_conn`. - If the pool is already full, the connection is discarded because we - exceeded maxsize. If connections are discarded frequently, then maxsize - should be increased. + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. """ try: self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass except Full: # This should never happen if self.block == True log.warning("HttpConnectionPool is full, discarding connection: %s" % self.host) + # Connection never got put back into the pool, close it. + conn.close() + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ @@ -258,21 +271,42 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if sock: sock.settimeout(timeout) - httplib_response = conn.getresponse() - - log.debug("\"%s %s %s\" %s %s" % - (method, url, - conn._http_vsn_str, # pylint: disable-msg=W0212 - httplib_response.status, httplib_response.length)) + try: # Python 2.7+, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older + httplib_response = conn.getresponse() + # AppEngine doesn't have a version attr. + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') + log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, + httplib_response.status, + httplib_response.length)) return httplib_response + def close(self): + """ + Close all pooled connections and disable the pool. + """ + # Disable access to the pool + old_pool, self.pool = self.pool, None + + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + + except Empty: + pass # Done. def is_same_host(self, url): """ Check if the given ``url`` is a member of the same host as this connection pool. """ + if url.startswith('/'): + return True + # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) @@ -280,8 +314,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) - return (url.startswith('/') or - (scheme, host, port) == (self.scheme, self.host, self.port)) + return (scheme, host, port) == (self.scheme, self.host, self.port) def urlopen(self, method, url, body=None, headers=None, retries=3, redirect=True, assert_same_host=True, timeout=_Default, @@ -320,8 +353,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Number of retries to allow before raising a MaxRetryError exception. :param redirect: - Automatically handle redirects (status codes 301, 302, 303, 307), - each redirect counts as a retry. + If True, automatically handle redirects (status codes 301, 302, + 303, 307). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -374,7 +407,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: # Request a connection from the queue - # (Could raise SocketError: Bad file descriptor) conn = self._get_conn(timeout=pool_timeout) # Make the request on the httplib connection object @@ -417,29 +449,38 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Name mismatch raise SSLError(e) - except (HTTPException, SocketError) as e: + except HTTPException as e: # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e finally: - if conn and release_conn: - # Put the connection back to be reused + if release_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. self._put_conn(conn) if not conn: + # Try again log.warn("Retrying (%d attempts remain) after connection " "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, - redirect, assert_same_host) # Try again + redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) # Handle redirect? redirect_location = redirect and response.get_redirect_location() if redirect_location: + if response.status == 303: + method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host) + retries - 1, redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py deleted file mode 100644 index bb41fd1..0000000 --- a/urllib3/contrib/ntlmpool.py +++ /dev/null @@ -1,120 +0,0 @@ -# urllib3/contrib/ntlmpool.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - -""" -NTLM authenticating pool, contributed by erikcederstran - -Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 -""" - -try: - from http.client import HTTPSConnection -except ImportError: - from httplib import HTTPSConnection -from logging import getLogger -from ntlm import ntlm - -from urllib3 import HTTPSConnectionPool - - -log = getLogger(__name__) - - -class NTLMConnectionPool(HTTPSConnectionPool): - """ - Implements an NTLM authentication version of an urllib3 connection pool - """ - - scheme = 'https' - - def __init__(self, user, pw, authurl, *args, **kwargs): - """ - authurl is a random URL on the server that is protected by NTLM. - user is the Windows user, probably in the DOMAIN\username format. - pw is the password for the user. - """ - super(NTLMConnectionPool, self).__init__(*args, **kwargs) - self.authurl = authurl - self.rawuser = user - user_parts = user.split('\\', 1) - self.domain = user_parts[0].upper() - self.user = user_parts[1] - self.pw = pw - - def _new_conn(self): - # Performs the NTLM handshake that secures the connection. The socket - # must be kept open while requests are performed. - self.num_connections += 1 - log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % - (self.num_connections, self.host, self.authurl)) - - headers = {} - headers['Connection'] = 'Keep-Alive' - req_header = 'Authorization' - resp_header = 'www-authenticate' - - conn = HTTPSConnection(host=self.host, port=self.port) - - # Send negotiation message - headers[req_header] = ( - 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) - log.debug('Request headers: %s' % headers) - conn.request('GET', self.authurl, None, headers) - res = conn.getresponse() - reshdr = dict(res.getheaders()) - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % reshdr) - log.debug('Response data: %s [...]' % res.read(100)) - - # Remove the reference to the socket, so that it can not be closed by - # the response object (we want to keep the socket open) - res.fp = None - - # Server should respond with a challenge message - auth_header_values = reshdr[resp_header].split(', ') - auth_header_value = None - for s in auth_header_values: - if s[:5] == 'NTLM ': - auth_header_value = s[5:] - if auth_header_value is None: - raise Exception('Unexpected %s response header: %s' % - (resp_header, reshdr[resp_header])) - - # Send authentication message - ServerChallenge, NegotiateFlags = \ - ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) - auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, - self.user, - self.domain, - self.pw, - NegotiateFlags) - headers[req_header] = 'NTLM %s' % auth_msg - log.debug('Request headers: %s' % headers) - conn.request('GET', self.authurl, None, headers) - res = conn.getresponse() - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % dict(res.getheaders())) - log.debug('Response data: %s [...]' % res.read()[:100]) - if res.status != 200: - if res.status == 401: - raise Exception('Server rejected request: wrong ' - 'username or password') - raise Exception('Wrong server response: %s %s' % - (res.status, res.reason)) - - res.fp = None - log.debug('Connection established') - return conn - - def urlopen(self, method, url, body=None, headers=None, retries=3, - redirect=True, assert_same_host=True): - if headers is None: - headers = {} - headers['Connection'] = 'Keep-Alive' - return super(NTLMConnectionPool, self).urlopen(method, url, body, - headers, retries, - redirect, - assert_same_host) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 15c9699..99ebb67 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -24,6 +24,11 @@ class SSLError(HTTPError): pass +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." + pass + + ## Leaf Exceptions class MaxRetryError(PoolError): @@ -57,6 +62,11 @@ class EmptyPoolError(PoolError): pass +class ClosedPoolError(PoolError): + "Raised when a request enters a pool after the pool has been closed." + pass + + class LocationParseError(ValueError, HTTPError): "Raised when get_host or similar fails to parse the URL input." diff --git a/urllib3/filepost.py b/urllib3/filepost.py index 344a103..e679b93 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -7,11 +7,7 @@ import codecs import mimetypes -try: - from mimetools import choose_boundary -except ImportError: - from .packages.mimetools_choose_boundary import choose_boundary - +from uuid import uuid4 from io import BytesIO from .packages import six @@ -20,6 +16,13 @@ from .packages.six import b writer = codecs.lookup('utf-8')[3] +def choose_boundary(): + """ + Our embarassingly-simple replacement for mimetools.choose_boundary. + """ + return uuid4().hex + + def get_content_type(filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' diff --git a/urllib3/packages/mimetools_choose_boundary/__init__.py b/urllib3/packages/mimetools_choose_boundary/__init__.py deleted file mode 100644 index a0109ab..0000000 --- a/urllib3/packages/mimetools_choose_boundary/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -"""The function mimetools.choose_boundary() from Python 2.7, which seems to -have disappeared in Python 3 (although email.generator._make_boundary() might -work as a replacement?). - -Tweaked to use lock from threading rather than thread. -""" -import os -from threading import Lock -_counter_lock = Lock() - -_counter = 0 -def _get_next_counter(): - global _counter - with _counter_lock: - _counter += 1 - return _counter - -_prefix = None - -def choose_boundary(): - """Return a string usable as a multipart boundary. - - The string chosen is unique within a single program run, and - incorporates the user id (if available), process id (if available), - and current time. So it's very unlikely the returned string appears - in message text, but there's no guarantee. - - The boundary contains dots so you have to quote it in the header.""" - - global _prefix - import time - if _prefix is None: - import socket - try: - hostid = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - hostid = '127.0.0.1' - try: - uid = repr(os.getuid()) - except AttributeError: - uid = '1' - try: - pid = repr(os.getpid()) - except AttributeError: - pid = '1' - _prefix = hostid + '.' + uid + '.' + pid - return "%s.%.3f.%d" % (_prefix, time.time(), _get_next_counter()) diff --git a/urllib3/packages/ordered_dict.py b/urllib3/packages/ordered_dict.py new file mode 100644 index 0000000..7f8ee15 --- /dev/null +++ b/urllib3/packages/ordered_dict.py @@ -0,0 +1,260 @@ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# Copyright 2009 Raymond Hettinger, released under the MIT License. +# http://code.activestate.com/recipes/576693/ + +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 310ea21..8f5b54c 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -8,9 +8,9 @@ import logging from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import get_host, connection_from_url, port_by_scheme -from .exceptions import HostChangedError +from .connectionpool import connection_from_url, port_by_scheme from .request import RequestMethods +from .util import parse_url __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -48,19 +48,29 @@ class PoolManager(RequestMethods): """ - # TODO: Make sure there are no memory leaks here. - def __init__(self, num_pools=10, **connection_pool_kw): self.connection_pool_kw = connection_pool_kw - self.pools = RecentlyUsedContainer(num_pools) + self.pools = RecentlyUsedContainer(num_pools, + dispose_func=lambda p: p.close()) + + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() - def connection_from_host(self, host, port=80, scheme='http'): + def connection_from_host(self, host, port=None, scheme='http'): """ Get a :class:`ConnectionPool` based on the host, port, and scheme. - Note that an appropriate ``port`` value is required here to normalize - connection pools in our container most effectively. + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. """ + port = port or port_by_scheme.get(scheme, 80) + pool_key = (scheme, host, port) # If the scheme, host, or port doesn't match existing open connections, @@ -86,26 +96,36 @@ class PoolManager(RequestMethods): Additional parameters are taken from the :class:`.PoolManager` constructor. """ - scheme, host, port = get_host(url) - - port = port or port_by_scheme.get(scheme, 80) - - return self.connection_from_host(host, port=port, scheme=scheme) + u = parse_url(url) + return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): """ - Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with custom cross-host redirect logic and only sends the request-uri + portion of the ``url``. - ``url`` must be absolute, such that an appropriate + The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ - conn = self.connection_from_url(url) - try: - return conn.urlopen(method, url, **kw) + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + + kw['assert_same_host'] = False + kw['redirect'] = False + + response = conn.urlopen(method, u.request_uri, **kw) + + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + if response.status == 303: + method = 'GET' - except HostChangedError as e: - kw['retries'] = e.retries # Persist retries countdown - return self.urlopen(method, e.url, **kw) + log.info("Redirecting %s -> %s" % (url, redirect_location)) + kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + return self.urlopen(method, redirect_location, **kw) class ProxyManager(RequestMethods): diff --git a/urllib3/response.py b/urllib3/response.py index 5fab824..28537d3 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -10,7 +10,7 @@ import zlib from io import BytesIO -from .exceptions import HTTPError +from .exceptions import DecodeError from .packages.six import string_types as basestring @@ -148,9 +148,9 @@ class HTTPResponse(object): try: if decode_content and decoder: data = decoder(data) - except IOError: - raise HTTPError("Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding) + except (IOError, zlib.error): + raise DecodeError("Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding) if cache_content: self._body = data diff --git a/urllib3/util.py b/urllib3/util.py index 2684a2f..8ec990b 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -6,6 +6,8 @@ from base64 import b64encode +from collections import namedtuple +from socket import error as SocketError try: from select import poll, POLLIN @@ -20,6 +22,152 @@ from .packages import six from .exceptions import LocationParseError +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example: :: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example: :: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> prase_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> prase_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this imeplementations does silly things to be optimal + # on CPython. + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + auth, url = url.split('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url[1:].split(']', 1) + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + + port = int(port) + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port + + def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, basic_auth=None): """ @@ -72,60 +220,28 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, return headers -def get_host(url): - """ - Given a url, return its scheme, host and port (None if it's not there). - - For example: :: - - >>> get_host('http://google.com/mail/') - ('http', 'google.com', None) - >>> get_host('google.com:80') - ('http', 'google.com', 80) - """ - - # This code is actually similar to urlparse.urlsplit, but much - # simplified for our needs. - port = None - scheme = 'http' - - if '://' in url: - scheme, url = url.split('://', 1) - if '/' in url: - url, _path = url.split('/', 1) - if '@' in url: - _auth, url = url.split('@', 1) - if ':' in url: - url, port = url.split(':', 1) - - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - - port = int(port) - - return scheme, url, port - - - def is_connection_dropped(conn): """ Returns True if the connection is dropped and should be closed. :param conn: - ``HTTPConnection`` object. + :class:`httplib.HTTPConnection` object. Note: For platforms like AppEngine, this will always return ``False`` to let the platform handle connection recycling transparently for us. """ sock = getattr(conn, 'sock', False) - if not sock: #Platform-specific: AppEngine + if not sock: # Platform-specific: AppEngine return False if not poll: # Platform-specific - if not select: #Platform-specific: AppEngine + if not select: # Platform-specific: AppEngine return False - return select([sock], [], [], 0.0)[0] + try: + return select([sock], [], [], 0.0)[0] + except SocketError: + return True # This version is better on platforms that support it. p = poll() -- cgit v1.2.3