diff options
33 files changed, 1116 insertions, 170 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index a0cbdb3..a2a0da8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,66 @@ Changes ======= +1.6 (2013-04-25) +++++++++++++++++ + +* Contrib: Optional SNI support for Py2 using PyOpenSSL. (Issue #156) + +* ``ProxyManager`` automatically adds ``Host: ...`` header if not given. + +* Improved SSL-related code. ``cert_req`` now optionally takes a string like + "REQUIRED" or "NONE". Same with ``ssl_version`` takes strings like "SSLv23" + The string values reflect the suffix of the respective constant variable. + (Issue #130) + +* Vendored ``socksipy`` now based on Anorov's fork which handles unexpectedly + closed proxy connections and larger read buffers. (Issue #135) + +* Ensure the connection is closed if no data is received, fixes connection leak + on some platforms. (Issue #133) + +* Added SNI support for SSL/TLS connections on Py32+. (Issue #89) + +* Tests fixed to be compatible with Py26 again. (Issue #125) + +* Added ability to choose SSL version by passing an ``ssl.PROTOCOL_*`` constant + to the ``ssl_version`` parameter of ``HTTPSConnectionPool``. (Issue #109) + +* Allow an explicit content type to be specified when encoding file fields. + (Issue #126) + +* Exceptions are now pickleable, with tests. (Issue #101) + +* Fixed default headers not getting passed in some cases. (Issue #99) + +* Treat "content-encoding" header value as case-insensitive, per RFC 2616 + Section 3.5. (Issue #110) + +* "Connection Refused" SocketErrors will get retried rather than raised. + (Issue #92) + +* Updated vendored ``six``, no longer overrides the global ``six`` module + namespace. (Issue #113) + +* ``urllib3.exceptions.MaxRetryError`` contains a ``reason`` property holding + the exception that prompted the final retry. If ``reason is None`` then it + was due to a redirect. (Issue #92, #114) + +* Fixed ``PoolManager.urlopen()`` from not redirecting more than once. + (Issue #149) + +* Don't assume ``Content-Type: text/plain`` for multi-part encoding parameters + that are not files. (Issue #111) + +* Pass `strict` param down to ``httplib.HTTPConnection``. (Issue #122) + +* Added mechanism to verify SSL certificates by fingerprint (md5, sha1) or + against an arbitrary hostname (when connecting by IP or for misconfigured + servers). (Issue #140) + +* Streaming decompression support. (Issue #159) + + 1.5 (2012-08-02) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 7dfbcaf..19f59ce 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -45,5 +45,20 @@ In chronological order: * Shivaram Lingamneni <slingamn@cs.stanford.edu> * Support for explicitly closing pooled connections +* hartator <hartator@gmail.com> + * Corrected multipart behavior for params + +* Thomas Weißschuh <thomas@t-8ch.de> + * Support for TLS SNI + * API unification of ssl_version/cert_reqs + * SSL fingerprint and alternative hostname verification + * Bugfixes in testsuite + +* Sune Kirkeby <mig@ibofobi.dk> + * Optional SNI-support for Python 2 via PyOpenSSL. + +* Marc Schlaich <marc.schlaich@gmail.com> + * Various bugfixes and test improvements. + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/LICENSE.txt b/LICENSE.txt index f658ad6..31f0b6c 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ This is the MIT license: http://www.opensource.org/licenses/mit-license.php -Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software @@ -1,12 +1,20 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.5 +Version: 1.6 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov Author-email: andrey.petrov@shazow.net License: MIT -Description: Highlights +Description: ======= + urllib3 + ======= + + .. image:: https://travis-ci.org/shazow/urllib3.png?branch=master + :target: https://travis-ci.org/shazow/urllib3 + + + Highlights ========== - Re-use the same socket connection for multiple requests @@ -17,7 +25,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests <http://python-requests.org/>`_ which is also powered by urllib3. @@ -109,6 +117,66 @@ Description: Highlights Changes ======= + 1.6 (2013-04-25) + ++++++++++++++++ + + * Contrib: Optional SNI support for Py2 using PyOpenSSL. (Issue #156) + + * ``ProxyManager`` automatically adds ``Host: ...`` header if not given. + + * Improved SSL-related code. ``cert_req`` now optionally takes a string like + "REQUIRED" or "NONE". Same with ``ssl_version`` takes strings like "SSLv23" + The string values reflect the suffix of the respective constant variable. + (Issue #130) + + * Vendored ``socksipy`` now based on Anorov's fork which handles unexpectedly + closed proxy connections and larger read buffers. (Issue #135) + + * Ensure the connection is closed if no data is received, fixes connection leak + on some platforms. (Issue #133) + + * Added SNI support for SSL/TLS connections on Py32+. (Issue #89) + + * Tests fixed to be compatible with Py26 again. (Issue #125) + + * Added ability to choose SSL version by passing an ``ssl.PROTOCOL_*`` constant + to the ``ssl_version`` parameter of ``HTTPSConnectionPool``. (Issue #109) + + * Allow an explicit content type to be specified when encoding file fields. + (Issue #126) + + * Exceptions are now pickleable, with tests. (Issue #101) + + * Fixed default headers not getting passed in some cases. (Issue #99) + + * Treat "content-encoding" header value as case-insensitive, per RFC 2616 + Section 3.5. (Issue #110) + + * "Connection Refused" SocketErrors will get retried rather than raised. + (Issue #92) + + * Updated vendored ``six``, no longer overrides the global ``six`` module + namespace. (Issue #113) + + * ``urllib3.exceptions.MaxRetryError`` contains a ``reason`` property holding + the exception that prompted the final retry. If ``reason is None`` then it + was due to a redirect. (Issue #92, #114) + + * Fixed ``PoolManager.urlopen()`` from not redirecting more than once. + (Issue #149) + + * Don't assume ``Content-Type: text/plain`` for multi-part encoding parameters + that are not files. (Issue #111) + + * Pass `strict` param down to ``httplib.HTTPConnection``. (Issue #122) + + * Added mechanism to verify SSL certificates by fingerprint (md5, sha1) or + against an arbitrary hostname (when connecting by IP or for misconfigured + servers). (Issue #140) + + * Streaming decompression support. (Issue #159) + + 1.5 (2012-08-02) ++++++++++++++++ @@ -1,3 +1,11 @@ +======= +urllib3 +======= + +.. image:: https://travis-ci.org/shazow/urllib3.png?branch=master + :target: https://travis-ci.org/shazow/urllib3 + + Highlights ========== @@ -9,7 +17,7 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. +- Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests <http://python-requests.org/>`_ which is also powered by urllib3. diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index ca809ad..ab48b53 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -1,6 +1,7 @@ from __future__ import print_function import gzip +import json import logging import sys import time @@ -120,7 +121,7 @@ class TestingApp(WSGIHandler): return Response(status='303', headers=headers) def keepalive(self, request): - if request.params.get('close', '0') == '1': + if request.params.get('close', b'0') == b'1': headers = [('Connection', 'close')] return Response('Closing', headers=headers) @@ -148,7 +149,9 @@ class TestingApp(WSGIHandler): if encoding == 'gzip': headers = [('Content-Encoding', 'gzip')] file_ = BytesIO() - gzip.GzipFile('', mode='w', fileobj=file_).write(data) + zipfile = gzip.GzipFile('', mode='w', fileobj=file_) + zipfile.write(data) + zipfile.close() data = file_.getvalue() elif encoding == 'deflate': headers = [('Content-Encoding', 'deflate')] @@ -161,5 +164,8 @@ class TestingApp(WSGIHandler): data = 'garbage' return Response(data, headers=headers) + def headers(self, request): + return Response(json.dumps(request.headers)) + def shutdown(self, request): sys.exit() diff --git a/dummyserver/server.py b/dummyserver/server.py index 6c0943c..9031664 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -60,6 +60,7 @@ class SocketServerThread(threading.Thread): self.ready_lock.release() self.socket_handler(sock) + sock.close() def run(self): self.server = self._start_server() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 518d739..73b8f2f 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -32,6 +32,11 @@ class SocketDummyServerTestCase(unittest.TestCase): # Lock gets released by thread above ready_lock.acquire() + @classmethod + def tearDownClass(cls): + if hasattr(cls, 'server_thread'): + cls.server_thread.join() + class HTTPDummyServerTestCase(unittest.TestCase): scheme = 'http' @@ -54,6 +59,7 @@ class HTTPDummyServerTestCase(unittest.TestCase): @classmethod def _stop_server(cls): cls.server_thread.stop() + cls.server_thread.join() @classmethod def setUpClass(cls): @@ -2,6 +2,7 @@ logging-clear-handlers = true with-coverage = true cover-package = urllib3 +cover-min-percentage = 100 [egg_info] tag_build = @@ -44,8 +44,9 @@ setup(name='urllib3', author_email='andrey.petrov@shazow.net', url='http://urllib3.readthedocs.org/', license='MIT', - packages=['urllib3', 'dummyserver', 'urllib3.packages', - 'urllib3.packages.ssl_match_hostname', + packages=['urllib3', 'dummyserver', + 'urllib3.packages', 'urllib3.packages.ssl_match_hostname', + 'urllib3.contrib', ], requires=requirements, tests_require=tests_requirements, diff --git a/test-requirements.txt b/test-requirements.txt index e2d1579..226c13d 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,3 @@ nose -tornado==2.1.1 +tornado==2.4.1 +coverage diff --git a/test/test_collections.py b/test/test_collections.py index 098b31a..b44c58a 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -119,9 +119,7 @@ class TestLRUContainer(unittest.TestCase): def test_iter(self): d = Container() - with self.assertRaises(NotImplementedError): - for i in d: - self.fail("Iteration shouldn't be implemented.") + self.assertRaises(NotImplementedError, d.__iter__) if __name__ == '__main__': unittest.main() diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index afc3098..a7e104a 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -11,7 +11,7 @@ from urllib3.exceptions import ( TimeoutError, ) -from socket import timeout as SocketTimeout +from socket import error as SocketError, timeout as SocketTimeout from ssl import SSLError as BaseSSLError try: # Python 3 @@ -23,6 +23,10 @@ except ImportError: class TestConnectionPool(unittest.TestCase): + """ + Tests in this suite should exercise the ConnectionPool functionality + without actually making any network requests or connections. + """ def test_same_host(self): same_host = [ ('http://google.com/', '/'), @@ -86,6 +90,24 @@ class TestConnectionPool(unittest.TestCase): str(EmptyPoolError(HTTPConnectionPool(host='localhost'), "Test.")), "HTTPConnectionPool(host='localhost', port=None): Test.") + def test_retry_exception_str(self): + self.assertEqual( + str(MaxRetryError( + HTTPConnectionPool(host='localhost'), "Test.", None)), + "HTTPConnectionPool(host='localhost', port=None): " + "Max retries exceeded with url: Test. (Caused by redirect)") + + err = SocketError("Test") + + # using err.__class__ here, as socket.error is an alias for OSError + # since Py3.3 and gets printed as this + self.assertEqual( + str(MaxRetryError( + HTTPConnectionPool(host='localhost'), "Test.", err)), + "HTTPConnectionPool(host='localhost', port=None): " + "Max retries exceeded with url: Test. " + "(Caused by {0}: Test)".format(str(err.__class__))) + def test_pool_size(self): POOL_SIZE = 1 pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True) @@ -95,8 +117,7 @@ class TestConnectionPool(unittest.TestCase): def _test(exception, expect): pool._make_request = lambda *args, **kwargs: _raise(exception) - with self.assertRaises(expect): - pool.request('GET', '/') + self.assertRaises(expect, pool.request, 'GET', '/') self.assertEqual(pool.pool.qsize(), POOL_SIZE) @@ -111,15 +132,15 @@ class TestConnectionPool(unittest.TestCase): # MaxRetryError, not EmptyPoolError # See: https://github.com/shazow/urllib3/issues/76 pool._make_request = lambda *args, **kwargs: _raise(HTTPException) - with self.assertRaises(MaxRetryError): - pool.request('GET', '/', retries=1, pool_timeout=0.01) + self.assertRaises(MaxRetryError, pool.request, + 'GET', '/', retries=1, pool_timeout=0.01) self.assertEqual(pool.pool.qsize(), POOL_SIZE) def test_assert_same_host(self): c = connection_from_url('http://google.com:80') - with self.assertRaises(HostChangedError): - c.request('GET', 'http://yahoo.com:80', assert_same_host=True) + self.assertRaises(HostChangedError, c.request, + 'GET', 'http://yahoo.com:80', assert_same_host=True) def test_pool_close(self): pool = connection_from_url('http://google.com:80') @@ -136,16 +157,14 @@ class TestConnectionPool(unittest.TestCase): pool.close() self.assertEqual(pool.pool, None) - with self.assertRaises(ClosedPoolError): - pool._get_conn() + self.assertRaises(ClosedPoolError, pool._get_conn) pool._put_conn(conn3) - with self.assertRaises(ClosedPoolError): - pool._get_conn() + self.assertRaises(ClosedPoolError, pool._get_conn) + + self.assertRaises(Empty, old_pool_queue.get, block=False) - with self.assertRaises(Empty): - old_pool_queue.get(block=False) if __name__ == '__main__': diff --git a/test/test_exceptions.py b/test/test_exceptions.py new file mode 100644 index 0000000..3e02ca6 --- /dev/null +++ b/test/test_exceptions.py @@ -0,0 +1,19 @@ +import unittest +import pickle + +from urllib3.exceptions import HTTPError, MaxRetryError, LocationParseError +from urllib3.connectionpool import HTTPConnectionPool + + + +class TestPickle(unittest.TestCase): + + def test_exceptions(self): + assert pickle.dumps(HTTPError(None)) + assert pickle.dumps(MaxRetryError(None, None, None)) + assert pickle.dumps(LocationParseError(None)) + + def test_exceptions_with_objects(self): + assert pickle.dumps(HTTPError('foo')) + assert pickle.dumps(MaxRetryError(HTTPConnectionPool('localhost'), '/', None)) + assert pickle.dumps(LocationParseError('fake location')) diff --git a/test/test_filepost.py b/test/test_filepost.py index c251778..70ab100 100644 --- a/test/test_filepost.py +++ b/test/test_filepost.py @@ -52,19 +52,17 @@ class TestMultipartEncoding(unittest.TestCase): self.assertEqual(encoded, b'--' + b(BOUNDARY) + b'\r\n' b'Content-Disposition: form-data; name="k"\r\n' - b'Content-Type: text/plain\r\n' b'\r\n' b'v\r\n' b'--' + b(BOUNDARY) + b'\r\n' b'Content-Disposition: form-data; name="k2"\r\n' - b'Content-Type: text/plain\r\n' b'\r\n' b'v2\r\n' b'--' + b(BOUNDARY) + b'--\r\n' , fields) self.assertEqual(content_type, - b'multipart/form-data; boundary=' + b(BOUNDARY)) + 'multipart/form-data; boundary=' + str(BOUNDARY)) def test_filename(self): @@ -82,4 +80,40 @@ class TestMultipartEncoding(unittest.TestCase): ) self.assertEqual(content_type, - b'multipart/form-data; boundary=' + b(BOUNDARY)) + 'multipart/form-data; boundary=' + str(BOUNDARY)) + + + def test_textplain(self): + fields = [('k', ('somefile.txt', b'v'))] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) + + self.assertEqual(content_type, + 'multipart/form-data; boundary=' + str(BOUNDARY)) + + + def test_explicit(self): + fields = [('k', ('somefile.txt', b'v', 'image/jpeg'))] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n' + b'Content-Type: image/jpeg\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) + + self.assertEqual(content_type, + 'multipart/form-data; boundary=' + str(BOUNDARY)) diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 273abf9..2faab94 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -54,13 +54,11 @@ class TestPoolManager(unittest.TestCase): p.clear() self.assertEqual(len(p.pools), 0) - with self.assertRaises(ClosedPoolError): - conn_pool._get_conn() + self.assertRaises(ClosedPoolError, conn_pool._get_conn) conn_pool._put_conn(conn) - with self.assertRaises(ClosedPoolError): - conn_pool._get_conn() + self.assertRaises(ClosedPoolError, conn_pool._get_conn) self.assertEqual(len(p.pools), 0) diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py new file mode 100644 index 0000000..64c86e8 --- /dev/null +++ b/test/test_proxymanager.py @@ -0,0 +1,27 @@ +import unittest + +from urllib3.poolmanager import ProxyManager + + +class TestProxyManager(unittest.TestCase): + def test_proxy_headers(self): + p = ProxyManager(None) + url = 'http://pypi.python.org/test' + + # Verify default headers + default_headers = {'Accept': '*/*', + 'Host': 'pypi.python.org'} + headers = p._set_proxy_headers(url) + + self.assertEqual(headers, default_headers) + + # Verify default headers don't overwrite provided headers + provided_headers = {'Accept': 'application/json', + 'custom': 'header', + 'Host': 'test.python.org'} + headers = p._set_proxy_headers(url, provided_headers) + + self.assertEqual(headers, provided_headers) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 964f677..199e379 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -63,6 +63,54 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.data, b'foo') + def test_decode_deflate_case_insensitve(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'DeFlAtE'}) + + self.assertEqual(r.data, b'foo') + + def test_chunked_decoding_deflate(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + + self.assertEqual(r.read(3), b'') + self.assertEqual(r.read(1), b'f') + self.assertEqual(r.read(2), b'oo') + + def test_chunked_decoding_deflate2(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + + self.assertEqual(r.read(1), b'') + self.assertEqual(r.read(1), b'f') + self.assertEqual(r.read(2), b'oo') + + def test_chunked_decoding_gzip(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'gzip'}, + preload_content=False) + + self.assertEqual(r.read(11), b'') + self.assertEqual(r.read(1), b'f') + self.assertEqual(r.read(2), b'oo') if __name__ == '__main__': unittest.main() diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 4e79ea8..661e33b 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,12 +1,20 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.5 +Version: 1.6 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov Author-email: andrey.petrov@shazow.net License: MIT -Description: Highlights +Description: ======= + urllib3 + ======= + + .. image:: https://travis-ci.org/shazow/urllib3.png?branch=master + :target: https://travis-ci.org/shazow/urllib3 + + + Highlights ========== - Re-use the same socket connection for multiple requests @@ -17,7 +25,7 @@ Description: Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests <http://python-requests.org/>`_ which is also powered by urllib3. @@ -109,6 +117,66 @@ Description: Highlights Changes ======= + 1.6 (2013-04-25) + ++++++++++++++++ + + * Contrib: Optional SNI support for Py2 using PyOpenSSL. (Issue #156) + + * ``ProxyManager`` automatically adds ``Host: ...`` header if not given. + + * Improved SSL-related code. ``cert_req`` now optionally takes a string like + "REQUIRED" or "NONE". Same with ``ssl_version`` takes strings like "SSLv23" + The string values reflect the suffix of the respective constant variable. + (Issue #130) + + * Vendored ``socksipy`` now based on Anorov's fork which handles unexpectedly + closed proxy connections and larger read buffers. (Issue #135) + + * Ensure the connection is closed if no data is received, fixes connection leak + on some platforms. (Issue #133) + + * Added SNI support for SSL/TLS connections on Py32+. (Issue #89) + + * Tests fixed to be compatible with Py26 again. (Issue #125) + + * Added ability to choose SSL version by passing an ``ssl.PROTOCOL_*`` constant + to the ``ssl_version`` parameter of ``HTTPSConnectionPool``. (Issue #109) + + * Allow an explicit content type to be specified when encoding file fields. + (Issue #126) + + * Exceptions are now pickleable, with tests. (Issue #101) + + * Fixed default headers not getting passed in some cases. (Issue #99) + + * Treat "content-encoding" header value as case-insensitive, per RFC 2616 + Section 3.5. (Issue #110) + + * "Connection Refused" SocketErrors will get retried rather than raised. + (Issue #92) + + * Updated vendored ``six``, no longer overrides the global ``six`` module + namespace. (Issue #113) + + * ``urllib3.exceptions.MaxRetryError`` contains a ``reason`` property holding + the exception that prompted the final retry. If ``reason is None`` then it + was due to a redirect. (Issue #92, #114) + + * Fixed ``PoolManager.urlopen()`` from not redirecting more than once. + (Issue #149) + + * Don't assume ``Content-Type: text/plain`` for multi-part encoding parameters + that are not files. (Issue #111) + + * Pass `strict` param down to ``httplib.HTTPConnection``. (Issue #122) + + * Added mechanism to verify SSL certificates by fingerprint (md5, sha1) or + against an arbitrary hostname (when connecting by IP or for misconfigured + servers). (Issue #140) + + * Streaming decompression support. (Issue #159) + + 1.5 (2012-08-02) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 3155626..69ec475 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -12,8 +12,10 @@ dummyserver/server.py dummyserver/testcase.py test/test_collections.py test/test_connectionpool.py +test/test_exceptions.py test/test_filepost.py test/test_poolmanager.py +test/test_proxymanager.py test/test_response.py test/test_util.py urllib3/__init__.py @@ -29,6 +31,9 @@ urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt urllib3.egg-info/top_level.txt +urllib3/contrib/__init__.py +urllib3/contrib/ntlmpool.py +urllib3/contrib/pyopenssl.py urllib3/packages/__init__.py urllib3/packages/ordered_dict.py urllib3/packages/six.py diff --git a/urllib3/__init__.py b/urllib3/__init__.py index b552543..ebd43b3 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -1,5 +1,5 @@ # urllib3/__init__.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.5' +__version__ = '1.6' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index a052b1d..b35a736 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -1,5 +1,5 @@ # urllib3/_collections.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 97da544..73fa9ca 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -1,13 +1,15 @@ # urllib3/connectionpool.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php import logging import socket +import errno -from socket import timeout as SocketTimeout +from socket import error as SocketError, timeout as SocketTimeout +from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -41,7 +43,7 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. from .request import RequestMethods from .response import HTTPResponse -from .util import get_host, is_connection_dropped +from .util import get_host, is_connection_dropped, ssl_wrap_socket from .exceptions import ( ClosedPoolError, EmptyPoolError, @@ -76,32 +78,41 @@ class VerifiedHTTPSConnection(HTTPSConnection): """ cert_reqs = None ca_certs = None + ssl_version = None def set_cert(self, key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): - ssl_req_scheme = { - 'CERT_NONE': ssl.CERT_NONE, - 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, - 'CERT_REQUIRED': ssl.CERT_REQUIRED - } + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): self.key_file = key_file self.cert_file = cert_file - self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint def connect(self): # Add certificate verification sock = socket.create_connection((self.host, self.port), self.timeout) + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs) - if self.ca_certs: - match_hostname(self.sock.getpeercert(), self.host) - + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + else: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) ## Pool objects @@ -166,13 +177,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None): - super(HTTPConnectionPool, self).__init__(host, port) + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) self.strict = strict self.timeout = timeout self.pool = self.QueueCls(maxsize) self.block = block - self.headers = headers or {} # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): @@ -189,7 +200,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, port=self.port) + return HTTPConnection(host=self.host, + port=self.port, + strict=self.strict) def _get_conn(self, timeout=None): """ @@ -449,12 +462,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Name mismatch raise SSLError(e) - except HTTPException as e: + except (HTTPException, SocketError) as e: # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e + if retries == 0: + raise MaxRetryError(self, url, e) + finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -491,11 +507,15 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:httplib.HTTPSConnection`. + instead of :class:`httplib.HTTPSConnection`. - The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters - are only used if :mod:`ssl` is available and are fed into - :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and + ``ssl_version`` are only used if :mod:`ssl` is available and are fed into + :meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket + into an SSL socket. """ scheme = 'https' @@ -503,16 +523,20 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, - key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): + key_file=None, cert_file=None, cert_reqs=None, + ca_certs=None, ssl_version=None, + assert_hostname=None, assert_fingerprint=None): - super(HTTPSConnectionPool, self).__init__(host, port, - strict, timeout, maxsize, - block, headers) + HTTPConnectionPool.__init__(self, host, port, + strict, timeout, maxsize, + block, headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.ssl_version = ssl_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint def _new_conn(self): """ @@ -522,16 +546,25 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) - if not ssl: # Platform-specific: Python compiled without +ssl + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") - return HTTPSConnection(host=self.host, port=self.port) + return HTTPSConnection(host=self.host, + port=self.port, + strict=self.strict) - connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection = VerifiedHTTPSConnection(host=self.host, + port=self.port, + strict=self.strict) connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + + connection.ssl_version = self.ssl_version + return connection diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/urllib3/contrib/__init__.py diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py new file mode 100644 index 0000000..277ee0b --- /dev/null +++ b/urllib3/contrib/ntlmpool.py @@ -0,0 +1,120 @@ +# urllib3/contrib/ntlmpool.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +try: + from http.client import HTTPSConnection +except ImportError: + from httplib import HTTPSConnection +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py new file mode 100644 index 0000000..5c4c6d8 --- /dev/null +++ b/urllib3/contrib/pyopenssl.py @@ -0,0 +1,167 @@ +'''SSL with SNI-support for Python 2. + +This needs the following packages installed: + +* pyOpenSSL (tested with 0.13) +* ndg-httpsclient (tested with 0.3.2) +* pyasn1 (tested with 0.1.6) + +To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. +This can be done in a ``sitecustomize`` module, or at any other time before +your application begins using ``urllib3``, like this:: + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +Now you can use :mod:`urllib3` as you normally would, and it will support SNI +when the required modules are installed. +''' + +from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, + SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.subj_alt_name import SubjectAltName +import OpenSSL.SSL +from pyasn1.codec.der import decoder as der_decoder +from socket import _fileobject +import ssl + +from .. import connectionpool +from .. import util + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI only *really* works if we can read the subjectAltName of certificates. +HAS_SNI = SUBJ_ALT_NAME_SUPPORT + +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} +_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER + + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} + + +orig_util_HAS_SNI = util.HAS_SNI +orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket + + +def inject_into_urllib3(): + 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' + + connectionpool.ssl_wrap_socket = ssl_wrap_socket + util.HAS_SNI = HAS_SNI + + +def extract_from_urllib3(): + 'Undo monkey-patching by :func:`inject_into_urllib3`.' + + connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + util.HAS_SNI = orig_util_HAS_SNI + + +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +def get_subj_alt_name(peer_cert): + # Search through extensions + dns_name = [] + if not SUBJ_ALT_NAME_SUPPORT: + return dns_name + + general_names = SubjectAltName() + for i in range(peer_cert.get_extension_count()): + ext = peer_cert.get_extension(i) + ext_name = ext.get_short_name() + if ext_name != 'subjectAltName': + continue + + # PyOpenSSL returns extension data in ASN.1 encoded form + ext_dat = ext.get_data() + decoded_dat = der_decoder.decode(ext_dat, + asn1Spec=general_names) + + for name in decoded_dat: + if not isinstance(name, SubjectAltName): + continue + for entry in range(len(name)): + component = name.getComponentByPosition(entry) + if component.getName() != 'dNSName': + continue + dns_name.append(str(component.getComponent())) + + return dns_name + + +class WrappedSocket(object): + '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + + def __init__(self, connection, socket): + self.connection = connection + self.socket = socket + + def makefile(self, mode, bufsize=-1): + return _fileobject(self.connection, mode, bufsize) + + def settimeout(self, timeout): + return self.socket.settimeout(timeout) + + def sendall(self, data): + return self.connection.sendall(data) + + def getpeercert(self, binary_form=False): + x509 = self.connection.get_peer_certificate() + if not x509: + raise ssl.SSLError('') + + if binary_form: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, + x509) + + return { + 'subject': ( + (('commonName', x509.get_subject().CN),), + ), + 'subjectAltName': [ + ('DNS', value) + for value in get_subj_alt_name(x509) + ] + } + + +def _verify_callback(cnx, x509, err_no, err_depth, return_code): + return err_no == 0 + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) + if certfile: + ctx.use_certificate_file(certfile) + if keyfile: + ctx.use_privatekey_file(keyfile) + if cert_reqs != ssl.CERT_NONE: + ctx.set_verify(_openssl_verify[cert_reqs], _verify_callback) + if ca_certs: + try: + ctx.load_verify_locations(ca_certs, None) + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + + cnx = OpenSSL.SSL.Connection(ctx, sock) + cnx.set_tlsext_host_name(server_hostname) + cnx.set_connect_state() + try: + cnx.do_handshake() + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + + return WrappedSocket(cnx, sock) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 99ebb67..8dd76af 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -1,5 +1,5 @@ # urllib3/exceptions.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -18,6 +18,10 @@ class PoolError(HTTPError): self.pool = pool HTTPError.__init__(self, "%s: %s" % (pool, message)) + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url) + class SSLError(HTTPError): "Raised when SSL certificate fails in an HTTPS connection." @@ -34,10 +38,16 @@ class DecodeError(HTTPError): class MaxRetryError(PoolError): "Raised when the maximum number of retries is exceeded." - def __init__(self, pool, url): + def __init__(self, pool, url, reason=None): + self.reason = reason + message = "Max retries exceeded with url: %s" % url - PoolError.__init__(self, pool, message) + if reason: + message += " (Caused by %s: %s)" % (type(reason), reason) + else: + message += " (Caused by redirect)" + PoolError.__init__(self, pool, message) self.url = url @@ -72,6 +82,6 @@ class LocationParseError(ValueError, HTTPError): def __init__(self, location): message = "Failed to parse: %s" % location - super(LocationParseError, self).__init__(self, message) + HTTPError.__init__(self, message) self.location = location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e679b93..526a740 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -1,5 +1,5 @@ # urllib3/filepost.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -41,13 +41,16 @@ def iter_fields(fields): def encode_multipart_formdata(fields, boundary=None): """ - Encode a dictionary of ``fields`` using the multipart/form-data mime format. + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields or list of (key, value) field tuples. The key is - treated as the field name, and the value as the body of the form-data - bytes. If the value is a tuple of two elements, then the first element - is treated as the filename of the form-data section. + Dictionary of fields or list of (key, value) or (key, value, MIME type) + field tuples. The key is treated as the field name, and the value as + the body of the form-data bytes. If the value is a tuple of two + elements, then the first element is treated as the filename of the + form-data section and a suitable MIME type is guessed based on the + filename. If the value is a tuple of three elements, then the third + element is treated as an explicit MIME type of the form-data section. Field names and filenames must be unicode. @@ -63,16 +66,20 @@ def encode_multipart_formdata(fields, boundary=None): body.write(b('--%s\r\n' % (boundary))) if isinstance(value, tuple): - filename, data = value + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = get_content_type(filename) writer(body).write('Content-Disposition: form-data; name="%s"; ' 'filename="%s"\r\n' % (fieldname, filename)) body.write(b('Content-Type: %s\r\n\r\n' % - (get_content_type(filename)))) + (content_type,))) else: data = value writer(body).write('Content-Disposition: form-data; name="%s"\r\n' % (fieldname)) - body.write(b'Content-Type: text/plain\r\n\r\n') + body.write(b'\r\n') if isinstance(data, int): data = str(data) # Backwards compatibility @@ -86,6 +93,6 @@ def encode_multipart_formdata(fields, boundary=None): body.write(b('--%s--\r\n' % (boundary))) - content_type = b('multipart/form-data; boundary=%s' % boundary) + content_type = str('multipart/form-data; boundary=%s' % boundary) return body.getvalue(), content_type diff --git a/urllib3/packages/six.py b/urllib3/packages/six.py index a64f6fb..27d8011 100644 --- a/urllib3/packages/six.py +++ b/urllib3/packages/six.py @@ -24,7 +24,7 @@ import sys import types __author__ = "Benjamin Peterson <benjamin@python.org>" -__version__ = "1.1.0" +__version__ = "1.2.0" # Revision 41c74fef2ded # True if we are running on Python 3. @@ -45,19 +45,23 @@ else: text_type = unicode binary_type = str - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit + if sys.platform.startswith("java"): + # Jython always uses 32 bits. MAXSIZE = int((1 << 31) - 1) else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X def _add_doc(func, doc): @@ -132,6 +136,7 @@ class _MovedItems(types.ModuleType): _moved_attributes = [ MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), MovedAttribute("map", "itertools", "builtins", "imap", "map"), MovedAttribute("reload_module", "__builtin__", "imp", "reload"), MovedAttribute("reduce", "__builtin__", "functools"), @@ -178,7 +183,7 @@ for attr in _moved_attributes: setattr(_MovedItems, attr.name, attr) del attr -moves = sys.modules["six.moves"] = _MovedItems("moves") +moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves") def add_move(move): @@ -219,12 +224,19 @@ else: _iteritems = "iteritems" +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + if PY3: def get_unbound_function(unbound): return unbound - - advance_iterator = next + Iterator = object def callable(obj): return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) @@ -232,9 +244,10 @@ else: def get_unbound_function(unbound): return unbound.im_func + class Iterator(object): - def advance_iterator(it): - return it.next() + def next(self): + return type(self).__next__(self) callable = callable _add_doc(get_unbound_function, @@ -249,15 +262,15 @@ get_function_defaults = operator.attrgetter(_func_defaults) def iterkeys(d): """Return an iterator over the keys of a dictionary.""" - return getattr(d, _iterkeys)() + return iter(getattr(d, _iterkeys)()) def itervalues(d): """Return an iterator over the values of a dictionary.""" - return getattr(d, _itervalues)() + return iter(getattr(d, _itervalues)()) def iteritems(d): """Return an iterator over the (key, value) pairs of a dictionary.""" - return getattr(d, _iteritems)() + return iter(getattr(d, _iteritems)()) if PY3: diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 8f5b54c..ce0c248 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -23,6 +23,9 @@ pool_classes_by_scheme = { log = logging.getLogger(__name__) +SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', + 'ssl_version') + class PoolManager(RequestMethods): """ @@ -30,8 +33,12 @@ class PoolManager(RequestMethods): necessary connection pools for you. :param num_pools: - Number of connection pools to cache before discarding the least recently - used pool. + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. :param \**connection_pool_kw: Additional parameters are used to create fresh @@ -40,19 +47,37 @@ class PoolManager(RequestMethods): Example: :: >>> manager = PoolManager(num_pools=2) - >>> r = manager.urlopen("http://google.com/") - >>> r = manager.urlopen("http://google.com/mail") - >>> r = manager.urlopen("http://yahoo.com/") + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') >>> len(manager.pools) 2 """ - def __init__(self, num_pools=10, **connection_pool_kw): + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): + RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def _new_pool(self, scheme, host, port): + """ + Create a new :class:`ConnectionPool` based on host, port and scheme. + + This method is used to actually create the connection pools handed out + by :meth:`connection_from_url` and companion methods. It is intended + to be overridden for customization. + """ + pool_cls = pool_classes_by_scheme[scheme] + kwargs = self.connection_pool_kw + if scheme == 'http': + kwargs = self.connection_pool_kw.copy() + for kw in SSL_KEYWORDS: + kwargs.pop(kw, None) + + return pool_cls(host, port, **kwargs) + def clear(self): """ Empty our store of pools and direct them all to close. @@ -69,6 +94,7 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) @@ -80,11 +106,8 @@ class PoolManager(RequestMethods): return pool # Make a fresh ConnectionPool of the desired type - pool_cls = pool_classes_by_scheme[scheme] - pool = pool_cls(host, port, **self.connection_pool_kw) - + pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool - return pool def connection_from_url(self, url): @@ -113,6 +136,8 @@ class PoolManager(RequestMethods): kw['assert_same_host'] = False kw['redirect'] = False + if 'headers' not in kw: + kw['headers'] = self.headers response = conn.urlopen(method, u.request_uri, **kw) @@ -124,32 +149,41 @@ class PoolManager(RequestMethods): method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + kw['redirect'] = redirect return self.urlopen(method, redirect_location, **kw) class ProxyManager(RequestMethods): """ Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. + will make requests to any url through the defined proxy. The ProxyManager + class will automatically set the 'Host' header if it is not provided. """ def __init__(self, proxy_pool): self.proxy_pool = proxy_pool - def _set_proxy_headers(self, headers=None): - headers = headers or {} + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + + host = parse_url(url).host + if host: + headers_['Host'] = host - # Same headers are curl passes for --proxy1.0 - headers['Accept'] = '*/*' - headers['Proxy-Connection'] = 'Keep-Alive' + if headers: + headers_.update(headers) - return headers + return headers_ def urlopen(self, method, url, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(kw.get('headers')) + kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) return self.proxy_pool.urlopen(method, url, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index 569ac96..bf0256e 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,5 +1,5 @@ # urllib3/request.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -36,12 +36,20 @@ class RequestMethods(object): :meth:`.request` is for making any kind of request, it will look up the appropriate encoding format and use one of the above two methods to make the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + def __init__(self, headers=None): + self.headers = headers or {} + def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, **kw): # Abstract @@ -97,13 +105,16 @@ class RequestMethods(object): such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND - key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example: :: fields = { 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), 'realfile': ('barfile.txt', open('realfile').read()), - 'nonamefile': ('contents of nonamefile field'), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', } When uploading a file, providing a filename (the first parameter of the @@ -121,8 +132,11 @@ class RequestMethods(object): body, content_type = (urlencode(fields or {}), 'application/x-www-form-urlencoded') - headers = headers or {} - headers.update({'Content-Type': content_type}) + if headers is None: + headers = self.headers + + headers_ = {'Content-Type': content_type} + headers_.update(headers) - return self.urlopen(method, url, body=body, headers=headers, + return self.urlopen(method, url, body=body, headers=headers_, **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py index 28537d3..1685760 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -1,32 +1,51 @@ # urllib3/response.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import gzip + import logging import zlib -from io import BytesIO - from .exceptions import DecodeError -from .packages.six import string_types as basestring +from .packages.six import string_types as basestring, binary_type log = logging.getLogger(__name__) -def decode_gzip(data): - gzipper = gzip.GzipFile(fileobj=BytesIO(data)) - return gzipper.read() +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + def __getattr__(self, name): + return getattr(self._obj, name) -def decode_deflate(data): - try: - return zlib.decompress(data) - except zlib.error: - return zlib.decompress(data, -zlib.MAX_WBITS) + def decompress(self, data): + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + return self._obj.decompress(data) + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None + + +def _get_decoder(mode): + if mode == 'gzip': + return zlib.decompressobj(16 + zlib.MAX_WBITS) + + return DeflateDecoder() class HTTPResponse(object): @@ -52,10 +71,7 @@ class HTTPResponse(object): otherwise unused. """ - CONTENT_DECODERS = { - 'gzip': decode_gzip, - 'deflate': decode_deflate, - } + CONTENT_DECODERS = ['gzip', 'deflate'] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -65,8 +81,9 @@ class HTTPResponse(object): self.version = version self.reason = reason self.strict = strict + self.decode_content = decode_content - self._decode_content = decode_content + self._decoder = None self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response @@ -115,13 +132,13 @@ class HTTPResponse(object): parameters: ``decode_content`` and ``cache_content``. :param amt: - How much of the content to read. If specified, decoding and caching - is skipped because we can't decode partial content nor does it make - sense to cache partial content as the full response. + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. :param decode_content: If True, will attempt to decode the body based on the - 'content-encoding' header. (Overridden if ``amt`` is set.) + 'content-encoding' header. :param cache_content: If True, will save the returned data such that the same result is @@ -130,28 +147,50 @@ class HTTPResponse(object): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - content_encoding = self.headers.get('content-encoding') - decoder = self.CONTENT_DECODERS.get(content_encoding) + # Note: content-encoding value should be case-insensitive, per RFC 2616 + # Section 3.5 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) if decode_content is None: - decode_content = self._decode_content + decode_content = self.decode_content if self._fp is None: return + flush_decoder = False + try: if amt is None: # cStringIO doesn't like amt=None data = self._fp.read() + flush_decoder = True else: - return self._fp.read(amt) + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do not + # properly close the connection in all cases. There is no harm + # in redundantly calling close. + self._fp.close() + flush_decoder = True try: - if decode_content and decoder: - data = decoder(data) + if decode_content and self._decoder: + data = self._decoder.decompress(data) except (IOError, zlib.error): raise DecodeError("Received response with content-encoding: %s, but " "failed to decode it." % content_encoding) + if flush_decoder and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + if cache_content: self._body = data diff --git a/urllib3/util.py b/urllib3/util.py index 8ec990b..544f9ed 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -1,5 +1,5 @@ # urllib3/util.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -8,18 +8,32 @@ from base64 import b64encode from collections import namedtuple from socket import error as SocketError +from hashlib import md5, sha1 +from binascii import hexlify, unhexlify try: from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms +except ImportError: # `poll` doesn't exist on OSX and other platforms poll = False try: from select import select - except ImportError: # `select` doesn't exist on AppEngine. + except ImportError: # `select` doesn't exist on AppEngine. select = False +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + from .packages import six -from .exceptions import LocationParseError +from .exceptions import LocationParseError, SSLError class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): @@ -92,9 +106,9 @@ def parse_url(url): >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> prase_url('google.com:80') + >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> prase_url('/foo?bar') + >>> parse_url('/foo?bar') Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) """ @@ -220,7 +234,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, return headers -def is_connection_dropped(conn): +def is_connection_dropped(conn): # Platform-specific """ Returns True if the connection is dropped and should be closed. @@ -234,7 +248,7 @@ def is_connection_dropped(conn): if not sock: # Platform-specific: AppEngine return False - if not poll: # Platform-specific + if not poll: if not select: # Platform-specific: AppEngine return False @@ -250,3 +264,115 @@ def is_connection_dropped(conn): if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + + digest_length, rest = divmod(len(fingerprint), 2) + + if rest or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) |