diff options
40 files changed, 1657 insertions, 237 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 552d9b7..8d922a4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,69 @@ Changes ======= +1.10.4 (2015-05-03) ++++++++++++++++++++ + +* Migrate tests to Tornado 4. (Issue #594) + +* Append default warning configuration rather than overwrite. + (Issue #603) + +* Fix streaming decoding regression. (Issue #595) + +* Fix chunked requests losing state across keep-alive connections. + (Issue #599) + +* Fix hanging when chunked HEAD response has no body. (Issue #605) + + +1.10.3 (2015-04-21) ++++++++++++++++++++ + +* Emit ``InsecurePlatformWarning`` when SSLContext object is missing. + (Issue #558) + +* Fix regression of duplicate header keys being discarded. + (Issue #563) + +* ``Response.stream()`` returns a generator for chunked responses. + (Issue #560) + +* Set upper-bound timeout when waiting for a socket in PyOpenSSL. + (Issue #585) + +* Work on platforms without `ssl` module for plain HTTP requests. + (Issue #587) + +* Stop relying on the stdlib's default cipher list. (Issue #588) + + +1.10.2 (2015-02-25) ++++++++++++++++++++ + +* Fix file descriptor leakage on retries. (Issue #548) + +* Removed RC4 from default cipher list. (Issue #551) + +* Header performance improvements. (Issue #544) + +* Fix PoolManager not obeying redirect retry settings. (Issue #553) + + +1.10.1 (2015-02-10) ++++++++++++++++++++ + +* Pools can be used as context managers. (Issue #545) + +* Don't re-use connections which experienced an SSLError. (Issue #529) + +* Don't fail when gzip decoding an empty stream. (Issue #535) + +* Add sha256 support for fingerprint verification. (Issue #540) + +* Fixed handling of header values containing commas. (Issue #533) + + 1.10 (2014-12-14) +++++++++++++++++ @@ -42,7 +105,7 @@ Changes * Fixed packaging issues of some development-related files not getting included. (Issue #440) - + * Allow performing *only* fingerprint verification. (Issue #444) * Emit ``SecurityWarning`` if system clock is waaay off. (Issue #445) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index ecaf9bb..5807307 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -133,5 +133,21 @@ In chronological order: * Evgeny Kapun <abacabadabacaba@gmail.com> * Bugfixes +* Benjamen Meyer <bm_witness@yahoo.com> + * Security Warning Documentation update for proper capture + +* Shivan Sornarajah <github@sornars.com> + * Support for using ConnectionPool and PoolManager as context managers. + +* Alex Gaynor <alex.gaynor@gmail.com> + * Updates to the default SSL configuration + +* Tomas Tomecek <ttomecek@redhat.com> + * Implemented generator for getting chunks from chunked responses. + +* tlynn <https://github.com/tlynn> + * Respect the warning preferences at import. + * [Your name or handle] <[email or website]> * [Brief summary of your changes] + @@ -31,6 +31,7 @@ clean: find . -name "__pycache__" -delete rm -f $(REQUIREMENTS_OUT) rm -rf docs/_build + rm -rf build/ test: requirements nosetests @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.10 +Version: 1.10.4 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -156,6 +156,69 @@ Description: ======= Changes ======= + 1.10.4 (2015-05-03) + +++++++++++++++++++ + + * Migrate tests to Tornado 4. (Issue #594) + + * Append default warning configuration rather than overwrite. + (Issue #603) + + * Fix streaming decoding regression. (Issue #595) + + * Fix chunked requests losing state across keep-alive connections. + (Issue #599) + + * Fix hanging when chunked HEAD response has no body. (Issue #605) + + + 1.10.3 (2015-04-21) + +++++++++++++++++++ + + * Emit ``InsecurePlatformWarning`` when SSLContext object is missing. + (Issue #558) + + * Fix regression of duplicate header keys being discarded. + (Issue #563) + + * ``Response.stream()`` returns a generator for chunked responses. + (Issue #560) + + * Set upper-bound timeout when waiting for a socket in PyOpenSSL. + (Issue #585) + + * Work on platforms without `ssl` module for plain HTTP requests. + (Issue #587) + + * Stop relying on the stdlib's default cipher list. (Issue #588) + + + 1.10.2 (2015-02-25) + +++++++++++++++++++ + + * Fix file descriptor leakage on retries. (Issue #548) + + * Removed RC4 from default cipher list. (Issue #551) + + * Header performance improvements. (Issue #544) + + * Fix PoolManager not obeying redirect retry settings. (Issue #553) + + + 1.10.1 (2015-02-10) + +++++++++++++++++++ + + * Pools can be used as context managers. (Issue #545) + + * Don't re-use connections which experienced an SSLError. (Issue #529) + + * Don't fail when gzip decoding an empty stream. (Issue #535) + + * Add sha256 support for fingerprint verification. (Issue #540) + + * Fixed handling of header values containing commas. (Issue #533) + + 1.10 (2014-12-14) +++++++++++++++++ @@ -197,7 +260,7 @@ Description: ======= * Fixed packaging issues of some development-related files not getting included. (Issue #440) - + * Allow performing *only* fingerprint verification. (Issue #444) * Emit ``SecurityWarning`` if system clock is waaay off. (Issue #445) diff --git a/debian/changelog b/debian/changelog index 06d041a..5d241dc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +python-urllib3 (1.10.4-1) unstable; urgency=medium + + * New upstream release. + * debian/watch + - Use pypi.debian.net redirector. + * debian/patches/01_do-not-use-embedded-python-six.patch + - Refresh. + + -- Daniele Tricoli <eriol@mornie.org> Sun, 03 May 2015 17:18:55 +0200 + python-urllib3 (1.10-1) experimental; urgency=medium * New upstream release. diff --git a/debian/patches/01_do-not-use-embedded-python-six.patch b/debian/patches/01_do-not-use-embedded-python-six.patch index 62a5a51..2ad8061 100644 --- a/debian/patches/01_do-not-use-embedded-python-six.patch +++ b/debian/patches/01_do-not-use-embedded-python-six.patch @@ -1,7 +1,7 @@ Description: Do not use embedded copy of python-six. Author: Daniele Tricoli <eriol@mornie.org> Forwarded: not-needed -Last-Update: 2014-12-31 +Last-Update: 2015-05-03 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -13,7 +13,7 @@ Last-Update: 2014-12-31 +import six xrange = six.moves.xrange - + from nose.plugins.skip import SkipTest --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -27,7 +27,7 @@ @@ -40,12 +40,12 @@ Last-Update: 2014-12-31 writer = codecs.lookup('utf-8')[3] --- a/urllib3/response.py +++ b/urllib3/response.py -@@ -4,7 +4,7 @@ - - from ._collections import HTTPHeaderDict - from .exceptions import ProtocolError, DecodeError, ReadTimeoutError --from .packages.six import string_types as basestring, binary_type -+from six import string_types as basestring, binary_type +@@ -10,7 +10,7 @@ + from .exceptions import ( + ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked + ) +-from .packages.six import string_types as basestring, binary_type, PY3 ++from six import string_types as basestring, binary_type, PY3 from .connection import HTTPException, BaseSSLError from .util.response import is_fp_closed @@ -99,8 +99,8 @@ Last-Update: 2014-12-31 from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict --from .packages.six import iterkeys, itervalues -+from six import iterkeys, itervalues +-from .packages.six import iterkeys, itervalues, PY3 ++from six import iterkeys, itervalues, PY3 __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] diff --git a/debian/watch b/debian/watch index c404fa4..d58f4c1 100644 --- a/debian/watch +++ b/debian/watch @@ -1,2 +1,3 @@ version=3 -https://pypi.python.org/packages/source/u/urllib3/urllib3-(.*)\.tar\.gz +opts=uversionmangle=s/(rc|a|b|c)/~$1/ \ +http://pypi.debian.net/urllib3/urllib3-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) diff --git a/dev-requirements.txt b/dev-requirements.txt index 2eb5875..9ea3691 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -4,6 +4,4 @@ coverage==3.7.1 tox==1.7.1 twine==1.3.1 wheel==0.24.0 - -# Tornado 3.2.2 makes our tests flaky, so we stick with 3.1 -tornado==3.1.1 +tornado==4.1 diff --git a/docs/index.rst b/docs/index.rst index 1fc8a9c..81ac2d8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,6 +11,7 @@ urllib3 Documentation helpers collections contrib + security Highlights @@ -257,6 +258,35 @@ disabled individually. See the :class:`~urllib3.util.retry.Retry` definition for more details. +Stream +------ + +You may also stream your response and get data as they come (e.g. when using +``transfer-encoding: chunked``). In this case, method +:func:`~urllib3.response.HTTPResponse.stream` will return generator. + +:: + + >>> from urllib3 import PoolManager + >>> http = urllib3.PoolManager() + + >>> r = http.request("GET", "http://httpbin.org/stream/3") + >>> r.getheader("transfer-encoding") + 'chunked' + + >>> for chunk in r.stream(): + ... print chunk + {"url": "http://httpbin.org/stream/3", ..., "id": 0, ...} + {"url": "http://httpbin.org/stream/3", ..., "id": 1, ...} + {"url": "http://httpbin.org/stream/3", ..., "id": 2, ...} + >>> r.closed + True + +Completely consuming the stream will auto-close the response and release +the connection back to the pool. If you're only partially consuming the +consuming a stream, make sure to manually call ``r.close()`` on the +response. + Foundation ---------- diff --git a/docs/managers.rst b/docs/managers.rst index f9cab03..6c841b7 100644 --- a/docs/managers.rst +++ b/docs/managers.rst @@ -36,9 +36,23 @@ pools. That is, if you set the PoolManager ``num_pools`` to 10, then after making requests to 11 or more different hosts, the least recently used pools will be cleaned up eventually. -Cleanup of stale pools does not happen immediately. You can read more about the -implementation and the various adjustable variables within -:class:`~urllib3._collections.RecentlyUsedContainer`. +Cleanup of stale pools does not happen immediately but can be forced when used +as a context manager. + +.. doctest :: + + >>> from urllib3 import PoolManager + >>> with PoolManager(10) as manager: + ... r = manager.request('GET', 'http://example.com') + ... r = manager.request('GET', 'http://httpbin.org/') + ... len(manager.pools) + ... + 2 + >>> len(manager.pools) + 0 + +You can read more about the implementation and the various adjustable variables +within :class:`~urllib3._collections.RecentlyUsedContainer`. API --- diff --git a/docs/pools.rst b/docs/pools.rst index 63cb7d1..9cc2be9 100644 --- a/docs/pools.rst +++ b/docs/pools.rst @@ -33,7 +33,22 @@ If you need to make requests to the same host repeatedly, then you should use a By default, the pool will cache just one connection. If you're planning on using such a pool in a multithreaded environment, you should set the ``maxsize`` of the pool to a higher number, such as the number of threads. You can also control -many other variables like timeout, blocking, and default headers. +many other variables like timeout, blocking, and default headers. + +A ConnectionPool can be used as a context manager to automatically clear the +pool after usage. + +.. doctest :: + + >>> from urllib3 import HTTPConnectionPool + >>> with HTTPConnectionPool('ajax.googleapis.com', maxsize=1) as pool: + ... r = pool.request('GET', '/ajax/services/search/web', + ... fields={'q': 'urllib3', 'v': '1.0'}) + ... print(pool.pool) + ... + <queue.LifoQueue object at 0x7f67367dfcf8> + >>> print(pool.pool) + None Helpers ------- diff --git a/docs/security.rst b/docs/security.rst index 0566737..881730e 100644 --- a/docs/security.rst +++ b/docs/security.rst @@ -111,6 +111,8 @@ Once you find your root certificate file:: ... +.. _pyopenssl: + OpenSSL / PyOpenSSL ------------------- @@ -137,12 +139,14 @@ Now you can continue using urllib3 as you normally would. For more details, check the :mod:`~urllib3.contrib.pyopenssl` module. +.. _insecurerequestwarning: + InsecureRequestWarning ---------------------- .. versionadded:: 1.9 -Unverified HTTPS requests will trigger a warning:: +Unverified HTTPS requests will trigger a warning via Python's ``warnings`` module:: urllib3/connectionpool.py:736: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. @@ -158,3 +162,30 @@ you can use :func:`~urllib3.disable_warnings`:: urllib3.disable_warnings() Making unverified HTTPS requests is strongly discouraged. ˙ ͜ʟ˙ + +Alternatively, if you are using Python's ``logging`` module, you can capture the +warnings to your own log:: + + logging.captureWarnings(True) + +Capturing the warnings to your own log is much preferred over simply disabling +the warnings. + +InsecurePlatformWarning +----------------------- + +.. versionadded:: 1.11 + +Certain Python platforms (specifically, versions of Python earlier than 2.7.9) +have restrictions in their ``ssl`` module that limit the configuration that +``urllib3`` can apply. In particular, this can cause HTTPS requests that would +succeed on more featureful platforms to fail, and can cause certain security +features to be unavailable. + +If you encounter this warning, it is strongly recommended you upgrade to a +newer Python version, or that you use pyOpenSSL as described in the +:ref:`pyopenssl` section. + +If you know what you are doing and would like to disable this and other +warnings, please consult the :ref:`insecurerequestwarning` section for +instructions on how to handle the warnings. diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index 72faa1a..53fbe4a 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -9,7 +9,7 @@ import time import zlib from io import BytesIO -from tornado.wsgi import HTTPRequest +from tornado.web import RequestHandler try: from urllib.parse import urlsplit @@ -21,23 +21,34 @@ log = logging.getLogger(__name__) class Response(object): def __init__(self, body='', status='200 OK', headers=None): - if not isinstance(body, bytes): - body = body.encode('utf8') - self.body = body self.status = status self.headers = headers or [("Content-type", "text/plain")] - def __call__(self, environ, start_response): - start_response(self.status, self.headers) - return [self.body] + def __call__(self, request_handler): + status, reason = self.status.split(' ', 1) + request_handler.set_status(int(status), reason) + for header,value in self.headers: + request_handler.add_header(header,value) + + # chunked + if isinstance(self.body, list): + for item in self.body: + if not isinstance(item, bytes): + item = item.encode('utf8') + request_handler.write(item) + request_handler.flush() + else: + body = self.body + if not isinstance(body, bytes): + body = body.encode('utf8') + request_handler.write(body) -class WSGIHandler(object): - pass +RETRY_TEST_NAMES = collections.defaultdict(int) -class TestingApp(WSGIHandler): +class TestingApp(RequestHandler): """ Simple app that performs various operations, useful for testing an HTTP library. @@ -46,10 +57,25 @@ class TestingApp(WSGIHandler): it exists. Status code 200 indicates success, 400 indicates failure. Each method has its own conditions for success/failure. """ - def __call__(self, environ, start_response): - """ Call the correct method in this class based on the incoming URI """ - req = HTTPRequest(environ) + def get(self): + """ Handle GET requests """ + self._call_method() + + def post(self): + """ Handle POST requests """ + self._call_method() + def put(self): + """ Handle PUT requests """ + self._call_method() + + def options(self): + """ Handle OPTIONS requests """ + self._call_method() + + def _call_method(self): + """ Call the correct method in this class based on the incoming URI """ + req = self.request req.params = {} for k, v in req.arguments.items(): req.params[k] = next(iter(v)) @@ -60,13 +86,14 @@ class TestingApp(WSGIHandler): target = path[1:].replace('/', '_') method = getattr(self, target, self.index) + resp = method(req) if dict(resp.headers).get('Connection') == 'close': # FIXME: Can we kill the connection somehow? pass - return resp(environ, start_response) + resp(self) def index(self, _request): "Render simple message" @@ -184,15 +211,27 @@ class TestingApp(WSGIHandler): return Response("test-name header not set", status="400 Bad Request") - if not hasattr(self, 'retry_test_names'): - self.retry_test_names = collections.defaultdict(int) - self.retry_test_names[test_name] += 1 + RETRY_TEST_NAMES[test_name] += 1 - if self.retry_test_names[test_name] >= 2: + if RETRY_TEST_NAMES[test_name] >= 2: return Response("Retry successful!") else: return Response("need to keep retrying!", status="418 I'm A Teapot") + def chunked(self, request): + return Response(['123'] * 4) + + def chunked_gzip(self, request): + chunks = [] + compressor = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + + for uncompressed in [b'123'] * 4: + chunks.append(compressor.compress(uncompressed)) + + chunks.append(compressor.flush()) + + return Response(chunks, headers=[('Content-Encoding', 'gzip')]) + def shutdown(self, request): sys.exit() diff --git a/dummyserver/server.py b/dummyserver/server.py index 6ee9a5d..63124d3 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -13,9 +13,11 @@ import string import sys import threading import socket +import warnings + +from urllib3.exceptions import HTTPWarning from tornado.platform.auto import set_close_exec -import tornado.wsgi import tornado.httpserver import tornado.ioloop import tornado.web @@ -40,6 +42,11 @@ NO_SAN_CA = os.path.join(CERTS_PATH, 'cacert.no_san.pem') # Different types of servers we have: +class NoIPv6Warning(HTTPWarning): + "IPv6 is not available" + pass + + class SocketServerThread(threading.Thread): """ :param socket_handler: Callable which receives a socket argument for one @@ -50,13 +57,19 @@ class SocketServerThread(threading.Thread): def __init__(self, socket_handler, host='localhost', port=8081, ready_event=None): threading.Thread.__init__(self) + self.daemon = True self.socket_handler = socket_handler self.host = host self.ready_event = ready_event def _start_server(self): - sock = socket.socket(socket.AF_INET6) + if socket.has_ipv6: + sock = socket.socket(socket.AF_INET6) + else: + warnings.warn("No IPv6 support. Falling back to IPv4.", + NoIPv6Warning) + sock = socket.socket(socket.AF_INET) if sys.platform != 'win32': sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind((self.host, 0)) @@ -192,7 +205,7 @@ if __name__ == '__main__': host = '127.0.0.1' io_loop = tornado.ioloop.IOLoop() - app = tornado.wsgi.WSGIContainer(TestingApp()) + app = tornado.web.Application([(r".*", TestingApp)]) server, port = run_tornado_app(app, io_loop, None, 'http', host) server_thread = run_loop_in_thread(io_loop) diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 335b2f2..67e62cf 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -2,7 +2,7 @@ import unittest import socket import threading from nose.plugins.skip import SkipTest -from tornado import ioloop, web, wsgi +from tornado import ioloop, web from dummyserver.server import ( SocketServerThread, @@ -30,7 +30,9 @@ class SocketDummyServerTestCase(unittest.TestCase): ready_event=ready_event, host=cls.host) cls.server_thread.start() - ready_event.wait() + ready_event.wait(5) + if not ready_event.is_set(): + raise Exception("most likely failed to start server") cls.port = cls.server_thread.port @classmethod @@ -55,7 +57,7 @@ class HTTPDummyServerTestCase(unittest.TestCase): @classmethod def _start_server(cls): cls.io_loop = ioloop.IOLoop() - app = wsgi.WSGIContainer(TestingApp()) + app = web.Application([(r".*", TestingApp)]) cls.server, cls.port = run_tornado_app(app, cls.io_loop, cls.certs, cls.scheme, cls.host) cls.server_thread = run_loop_in_thread(cls.io_loop) @@ -97,11 +99,11 @@ class HTTPDummyProxyTestCase(unittest.TestCase): def setUpClass(cls): cls.io_loop = ioloop.IOLoop() - app = wsgi.WSGIContainer(TestingApp()) + app = web.Application([(r'.*', TestingApp)]) cls.http_server, cls.http_port = run_tornado_app( app, cls.io_loop, None, 'http', cls.http_host) - app = wsgi.WSGIContainer(TestingApp()) + app = web.Application([(r'.*', TestingApp)]) cls.https_server, cls.https_port = run_tornado_app( app, cls.io_loop, cls.https_certs, 'https', cls.http_host) diff --git a/test/__init__.py b/test/__init__.py index d56a4d3..2fce71c 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -13,7 +13,8 @@ from urllib3.packages import six # Reset. SO suggests this hostname TARPIT_HOST = '10.255.255.1' -VALID_SOURCE_ADDRESSES = [('::1', 0), ('127.0.0.1', 0)] +# (Arguments for socket, is it IPv6 address?) +VALID_SOURCE_ADDRESSES = [(('::1', 0), True), (('127.0.0.1', 0), False)] # RFC 5737: 192.0.2.0/24 is for testing only. # RFC 3849: 2001:db8::/32 is for documentation only. INVALID_SOURCE_ADDRESSES = [('192.0.2.255', 0), ('2001:db8::1', 0)] diff --git a/test/test_collections.py b/test/test_collections.py index 4d173ac..0b36512 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -7,6 +7,8 @@ from urllib3._collections import ( from urllib3.packages import six xrange = six.moves.xrange +from nose.plugins.skip import SkipTest + class TestLRUContainer(unittest.TestCase): def test_maxsize(self): @@ -125,56 +127,216 @@ class TestLRUContainer(unittest.TestCase): self.assertRaises(NotImplementedError, d.__iter__) -class TestHTTPHeaderDict(unittest.TestCase): - def setUp(self): - self.d = HTTPHeaderDict(A='foo') - self.d.add('a', 'bar') +class NonMappingHeaderContainer(object): + def __init__(self, **kwargs): + self._data = {} + self._data.update(kwargs) - def test_overwriting_with_setitem_replaces(self): - d = HTTPHeaderDict() + def keys(self): + return self._data.keys() - d['A'] = 'foo' - self.assertEqual(d['a'], 'foo') + def __getitem__(self, key): + return self._data[key] - d['a'] = 'bar' - self.assertEqual(d['A'], 'bar') + +class TestHTTPHeaderDict(unittest.TestCase): + def setUp(self): + self.d = HTTPHeaderDict(Cookie='foo') + self.d.add('cookie', 'bar') + + def test_create_from_kwargs(self): + h = HTTPHeaderDict(ab=1, cd=2, ef=3, gh=4) + self.assertEqual(len(h), 4) + self.assertTrue('ab' in h) + + def test_create_from_dict(self): + h = HTTPHeaderDict(dict(ab=1, cd=2, ef=3, gh=4)) + self.assertEqual(len(h), 4) + self.assertTrue('ab' in h) + + def test_create_from_iterator(self): + teststr = 'urllib3ontherocks' + h = HTTPHeaderDict((c, c*5) for c in teststr) + self.assertEqual(len(h), len(set(teststr))) + + def test_create_from_list(self): + h = HTTPHeaderDict([('ab', 'A'), ('cd', 'B'), ('cookie', 'C'), ('cookie', 'D'), ('cookie', 'E')]) + self.assertEqual(len(h), 3) + self.assertTrue('ab' in h) + clist = h.getlist('cookie') + self.assertEqual(len(clist), 3) + self.assertEqual(clist[0], 'C') + self.assertEqual(clist[-1], 'E') + + def test_create_from_headerdict(self): + org = HTTPHeaderDict([('ab', 'A'), ('cd', 'B'), ('cookie', 'C'), ('cookie', 'D'), ('cookie', 'E')]) + h = HTTPHeaderDict(org) + self.assertEqual(len(h), 3) + self.assertTrue('ab' in h) + clist = h.getlist('cookie') + self.assertEqual(len(clist), 3) + self.assertEqual(clist[0], 'C') + self.assertEqual(clist[-1], 'E') + self.assertFalse(h is org) + self.assertEqual(h, org) + + def test_setitem(self): + self.d['Cookie'] = 'foo' + self.assertEqual(self.d['cookie'], 'foo') + self.d['cookie'] = 'with, comma' + self.assertEqual(self.d.getlist('cookie'), ['with, comma']) + + def test_update(self): + self.d.update(dict(Cookie='foo')) + self.assertEqual(self.d['cookie'], 'foo') + self.d.update(dict(cookie='with, comma')) + self.assertEqual(self.d.getlist('cookie'), ['with, comma']) + + def test_delitem(self): + del self.d['cookie'] + self.assertFalse('cookie' in self.d) + self.assertFalse('COOKIE' in self.d) + + def test_add_well_known_multiheader(self): + self.d.add('COOKIE', 'asdf') + self.assertEqual(self.d.getlist('cookie'), ['foo', 'bar', 'asdf']) + self.assertEqual(self.d['cookie'], 'foo, bar, asdf') + + def test_add_comma_separated_multiheader(self): + self.d.add('bar', 'foo') + self.d.add('BAR', 'bar') + self.d.add('Bar', 'asdf') + self.assertEqual(self.d.getlist('bar'), ['foo', 'bar', 'asdf']) + self.assertEqual(self.d['bar'], 'foo, bar, asdf') + + def test_extend_from_list(self): + self.d.extend([('set-cookie', '100'), ('set-cookie', '200'), ('set-cookie', '300')]) + self.assertEqual(self.d['set-cookie'], '100, 200, 300') + + def test_extend_from_dict(self): + self.d.extend(dict(cookie='asdf'), b='100') + self.assertEqual(self.d['cookie'], 'foo, bar, asdf') + self.assertEqual(self.d['b'], '100') + self.d.add('cookie', 'with, comma') + self.assertEqual(self.d.getlist('cookie'), ['foo', 'bar', 'asdf', 'with, comma']) + + def test_extend_from_container(self): + h = NonMappingHeaderContainer(Cookie='foo', e='foofoo') + self.d.extend(h) + self.assertEqual(self.d['cookie'], 'foo, bar, foo') + self.assertEqual(self.d['e'], 'foofoo') + self.assertEqual(len(self.d), 2) + + def test_extend_from_headerdict(self): + h = HTTPHeaderDict(Cookie='foo', e='foofoo') + self.d.extend(h) + self.assertEqual(self.d['cookie'], 'foo, bar, foo') + self.assertEqual(self.d['e'], 'foofoo') + self.assertEqual(len(self.d), 2) def test_copy(self): h = self.d.copy() self.assertTrue(self.d is not h) - self.assertEqual(self.d, h) - - def test_add(self): - d = HTTPHeaderDict() - - d['A'] = 'foo' - d.add('a', 'bar') - - self.assertEqual(d['a'], 'foo, bar') - self.assertEqual(d['A'], 'foo, bar') + self.assertEqual(self.d, h) def test_getlist(self): - self.assertEqual(self.d.getlist('a'), ['foo', 'bar']) - self.assertEqual(self.d.getlist('A'), ['foo', 'bar']) + self.assertEqual(self.d.getlist('cookie'), ['foo', 'bar']) + self.assertEqual(self.d.getlist('Cookie'), ['foo', 'bar']) self.assertEqual(self.d.getlist('b'), []) + self.d.add('b', 'asdf') + self.assertEqual(self.d.getlist('b'), ['asdf']) - def test_delitem(self): - del self.d['a'] - self.assertFalse('a' in self.d) - self.assertFalse('A' in self.d) + def test_getlist_after_copy(self): + self.assertEqual(self.d.getlist('cookie'), HTTPHeaderDict(self.d).getlist('cookie')) def test_equal(self): - b = HTTPHeaderDict({'a': 'foo, bar'}) + b = HTTPHeaderDict(cookie='foo, bar') + c = NonMappingHeaderContainer(cookie='foo, bar') self.assertEqual(self.d, b) - c = [('a', 'foo, bar')] - self.assertNotEqual(self.d, c) + self.assertEqual(self.d, c) + self.assertNotEqual(self.d, 2) + + def test_not_equal(self): + b = HTTPHeaderDict(cookie='foo, bar') + c = NonMappingHeaderContainer(cookie='foo, bar') + self.assertFalse(self.d != b) + self.assertFalse(self.d != c) + self.assertNotEqual(self.d, 2) + + def test_pop(self): + key = 'Cookie' + a = self.d[key] + b = self.d.pop(key) + self.assertEqual(a, b) + self.assertFalse(key in self.d) + self.assertRaises(KeyError, self.d.pop, key) + dummy = object() + self.assertTrue(dummy is self.d.pop(key, dummy)) + + def test_discard(self): + self.d.discard('cookie') + self.assertFalse('cookie' in self.d) + self.d.discard('cookie') def test_len(self): self.assertEqual(len(self.d), 1) + self.d.add('cookie', 'bla') + self.d.add('asdf', 'foo') + # len determined by unique fieldnames + self.assertEqual(len(self.d), 2) def test_repr(self): - rep = "HTTPHeaderDict({'A': 'foo, bar'})" + rep = "HTTPHeaderDict({'Cookie': 'foo, bar'})" self.assertEqual(repr(self.d), rep) + def test_items(self): + items = self.d.items() + self.assertEqual(len(items), 2) + self.assertEqual(items[0][0], 'Cookie') + self.assertEqual(items[0][1], 'foo') + self.assertEqual(items[1][0], 'Cookie') + self.assertEqual(items[1][1], 'bar') + + def test_dict_conversion(self): + # Also tested in connectionpool, needs to preserve case + hdict = {'Content-Length': '0', 'Content-type': 'text/plain', 'Server': 'TornadoServer/1.2.3'} + h = dict(HTTPHeaderDict(hdict).items()) + self.assertEqual(hdict, h) + + def test_string_enforcement(self): + # This currently throws AttributeError on key.lower(), should probably be something nicer + self.assertRaises(Exception, self.d.__setitem__, 3, 5) + self.assertRaises(Exception, self.d.add, 3, 4) + self.assertRaises(Exception, self.d.__delitem__, 3) + self.assertRaises(Exception, HTTPHeaderDict, {3: 3}) + + def test_from_httplib_py2(self): + if six.PY3: + raise SkipTest("python3 has a different internal header implementation") + msg = """ +Server: nginx +Content-Type: text/html; charset=windows-1251 +Connection: keep-alive +X-Some-Multiline: asdf + asdf + asdf +Set-Cookie: bb_lastvisit=1348253375; expires=Sat, 21-Sep-2013 18:49:35 GMT; path=/ +Set-Cookie: bb_lastactivity=0; expires=Sat, 21-Sep-2013 18:49:35 GMT; path=/ +www-authenticate: asdf +www-authenticate: bla + +""" + buffer = six.moves.StringIO(msg.lstrip().replace('\n', '\r\n')) + msg = six.moves.http_client.HTTPMessage(buffer) + d = HTTPHeaderDict.from_httplib(msg) + self.assertEqual(d['server'], 'nginx') + cookies = d.getlist('set-cookie') + self.assertEqual(len(cookies), 2) + self.assertTrue(cookies[0].startswith("bb_lastvisit")) + self.assertTrue(cookies[1].startswith("bb_lastactivity")) + self.assertEqual(d['x-some-multiline'].split(), ['asdf', 'asdf', 'asdf']) + self.assertEqual(d['www-authenticate'], 'asdf, bla') + self.assertEqual(d.getlist('www-authenticate'), ['asdf', 'bla']) + if __name__ == '__main__': unittest.main() diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index a6dbcf4..0718b0f 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -205,6 +205,26 @@ class TestConnectionPool(unittest.TestCase): def test_no_host(self): self.assertRaises(LocationValueError, HTTPConnectionPool, None) + def test_contextmanager(self): + with connection_from_url('http://google.com:80') as pool: + # Populate with some connections + conn1 = pool._get_conn() + conn2 = pool._get_conn() + conn3 = pool._get_conn() + pool._put_conn(conn1) + pool._put_conn(conn2) + + old_pool_queue = pool.pool + + self.assertEqual(pool.pool, None) + + self.assertRaises(ClosedPoolError, pool._get_conn) + + pool._put_conn(conn3) + + self.assertRaises(ClosedPoolError, pool._get_conn) + + self.assertRaises(Empty, old_pool_queue.get, block=False) if __name__ == '__main__': diff --git a/test/test_no_ssl.py b/test/test_no_ssl.py new file mode 100644 index 0000000..b5961b8 --- /dev/null +++ b/test/test_no_ssl.py @@ -0,0 +1,89 @@ +""" +Test what happens if Python was built without SSL + +* Everything that does not involve HTTPS should still work +* HTTPS requests must fail with an error that points at the ssl module +""" + +import sys +import unittest + + +class ImportBlocker(object): + """ + Block Imports + + To be placed on ``sys.meta_path``. This ensures that the modules + specified cannot be imported, even if they are a builtin. + """ + def __init__(self, *namestoblock): + self.namestoblock = namestoblock + + def find_module(self, fullname, path=None): + if fullname in self.namestoblock: + return self + return None + + def load_module(self, fullname): + raise ImportError('import of {0} is blocked'.format(fullname)) + + +class ModuleStash(object): + """ + Stashes away previously imported modules + + If we reimport a module the data from coverage is lost, so we reuse the old + modules + """ + + def __init__(self, namespace, modules=sys.modules): + self.namespace = namespace + self.modules = modules + self._data = {} + + def stash(self): + self._data[self.namespace] = self.modules.pop(self.namespace, None) + + for module in list(self.modules.keys()): + if module.startswith(self.namespace + '.'): + self._data[module] = self.modules.pop(module) + + def pop(self): + self.modules.pop(self.namespace, None) + + for module in list(self.modules.keys()): + if module.startswith(self.namespace + '.'): + self.modules.pop(module) + + self.modules.update(self._data) + + +ssl_blocker = ImportBlocker('ssl', '_ssl') +module_stash = ModuleStash('urllib3') + + +class TestWithoutSSL(unittest.TestCase): + def setUp(self): + sys.modules.pop('ssl', None) + sys.modules.pop('_ssl', None) + + module_stash.stash() + sys.meta_path.insert(0, ssl_blocker) + + def tearDown(self): + assert sys.meta_path.pop(0) == ssl_blocker + module_stash.pop() + + +class TestImportWithoutSSL(TestWithoutSSL): + def test_cannot_import_ssl(self): + # python26 has neither contextmanagers (for assertRaises) nor + # importlib. + # 'import' inside 'lambda' is invalid syntax. + def import_ssl(): + import ssl + + self.assertRaises(ImportError, import_ssl) + + def test_import_urllib3(self): + import urllib3 diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 754ee8a..6195d51 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -71,6 +71,22 @@ class TestPoolManager(unittest.TestCase): self.assertRaises(LocationValueError, p.connection_from_url, 'http://@') self.assertRaises(LocationValueError, p.connection_from_url, None) + def test_contextmanager(self): + with PoolManager(1) as p: + conn_pool = p.connection_from_url('http://google.com') + self.assertEqual(len(p.pools), 1) + conn = conn_pool._get_conn() + + self.assertEqual(len(p.pools), 0) + + self.assertRaises(ClosedPoolError, conn_pool._get_conn) + + conn_pool._put_conn(conn) + + self.assertRaises(ClosedPoolError, conn_pool._get_conn) + + self.assertEqual(len(p.pools), 0) + if __name__ == '__main__': unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 7d67c93..2e2be0e 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -2,8 +2,12 @@ import unittest from io import BytesIO, BufferedReader +try: + import http.client as httplib +except ImportError: + import httplib from urllib3.response import HTTPResponse -from urllib3.exceptions import DecodeError +from urllib3.exceptions import DecodeError, ResponseNotChunked from base64 import b64decode @@ -73,6 +77,15 @@ class TestResponse(unittest.TestCase): 'content-encoding': 'deflate' }) + def test_reference_read(self): + fp = BytesIO(b'foo') + r = HTTPResponse(fp, preload_content=False) + + self.assertEqual(r.read(1), b'f') + self.assertEqual(r.read(2), b'oo') + self.assertEqual(r.read(), b'') + self.assertEqual(r.read(), b'') + def test_decode_deflate(self): import zlib data = zlib.compress(b'foo') @@ -102,6 +115,9 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.read(3), b'') self.assertEqual(r.read(1), b'f') self.assertEqual(r.read(2), b'oo') + self.assertEqual(r.read(), b'') + self.assertEqual(r.read(), b'') + def test_chunked_decoding_deflate2(self): import zlib @@ -116,6 +132,9 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.read(1), b'') self.assertEqual(r.read(1), b'f') self.assertEqual(r.read(2), b'oo') + self.assertEqual(r.read(), b'') + self.assertEqual(r.read(), b'') + def test_chunked_decoding_gzip(self): import zlib @@ -130,6 +149,9 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.read(11), b'') self.assertEqual(r.read(1), b'f') self.assertEqual(r.read(2), b'oo') + self.assertEqual(r.read(), b'') + self.assertEqual(r.read(), b'') + def test_body_blob(self): resp = HTTPResponse(b'foo') @@ -138,10 +160,6 @@ class TestResponse(unittest.TestCase): def test_io(self): import socket - try: - from http.client import HTTPResponse as OldHTTPResponse - except: - from httplib import HTTPResponse as OldHTTPResponse fp = BytesIO(b'foo') resp = HTTPResponse(fp, preload_content=False) @@ -156,7 +174,7 @@ class TestResponse(unittest.TestCase): # Try closing with an `httplib.HTTPResponse`, because it has an # `isclosed` method. - hlr = OldHTTPResponse(socket.socket()) + hlr = httplib.HTTPResponse(socket.socket()) resp2 = HTTPResponse(hlr, preload_content=False) self.assertEqual(resp2.closed, False) resp2.close() @@ -388,11 +406,227 @@ class TestResponse(unittest.TestCase): self.assertEqual(next(stream), b'o') self.assertRaises(StopIteration, next, stream) + def test_mock_transfer_encoding_chunked(self): + stream = [b"fo", b"o", b"bar"] + fp = MockChunkedEncodingResponse(stream) + r = httplib.HTTPResponse(MockSock) + r.fp = fp + resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'}) + + i = 0 + for c in resp.stream(): + self.assertEqual(c, stream[i]) + i += 1 + + def test_mock_gzipped_transfer_encoding_chunked_decoded(self): + """Show that we can decode the gizpped and chunked body.""" + def stream(): + # Set up a generator to chunk the gzipped body + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(b'foobar') + data += compress.flush() + for i in range(0, len(data), 2): + yield data[i:i+2] + + fp = MockChunkedEncodingResponse(list(stream())) + r = httplib.HTTPResponse(MockSock) + r.fp = fp + headers = {'transfer-encoding': 'chunked', 'content-encoding': 'gzip'} + resp = HTTPResponse(r, preload_content=False, headers=headers) + + data = b'' + for c in resp.stream(decode_content=True): + data += c + + self.assertEqual(b'foobar', data) + + def test_mock_transfer_encoding_chunked_custom_read(self): + stream = [b"foooo", b"bbbbaaaaar"] + fp = MockChunkedEncodingResponse(stream) + r = httplib.HTTPResponse(MockSock) + r.fp = fp + r.chunked = True + r.chunk_left = None + resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'}) + expected_response = [b'fo', b'oo', b'o', b'bb', b'bb', b'aa', b'aa', b'ar'] + response = list(resp.read_chunked(2)) + if getattr(self, "assertListEqual", False): + self.assertListEqual(expected_response, response) + else: + for index, item in enumerate(response): + v = expected_response[index] + self.assertEqual(item, v) + + def test_mock_transfer_encoding_chunked_unlmtd_read(self): + stream = [b"foooo", b"bbbbaaaaar"] + fp = MockChunkedEncodingResponse(stream) + r = httplib.HTTPResponse(MockSock) + r.fp = fp + r.chunked = True + r.chunk_left = None + resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'}) + if getattr(self, "assertListEqual", False): + self.assertListEqual(stream, list(resp.read_chunked())) + else: + for index, item in enumerate(resp.read_chunked()): + v = stream[index] + self.assertEqual(item, v) + + def test_read_not_chunked_response_as_chunks(self): + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + r = resp.read_chunked() + self.assertRaises(ResponseNotChunked, next, r) + + def test_invalid_chunks(self): + stream = [b"foooo", b"bbbbaaaaar"] + fp = MockChunkedInvalidEncoding(stream) + r = httplib.HTTPResponse(MockSock) + r.fp = fp + r.chunked = True + r.chunk_left = None + resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'}) + self.assertRaises(httplib.IncompleteRead, next, resp.read_chunked()) + + def test_chunked_response_without_crlf_on_end(self): + stream = [b"foo", b"bar", b"baz"] + fp = MockChunkedEncodingWithoutCRLFOnEnd(stream) + r = httplib.HTTPResponse(MockSock) + r.fp = fp + r.chunked = True + r.chunk_left = None + resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'}) + if getattr(self, "assertListEqual", False): + self.assertListEqual(stream, list(resp.stream())) + else: + for index, item in enumerate(resp.stream()): + v = stream[index] + self.assertEqual(item, v) + + def test_chunked_response_with_extensions(self): + stream = [b"foo", b"bar"] + fp = MockChunkedEncodingWithExtensions(stream) + r = httplib.HTTPResponse(MockSock) + r.fp = fp + r.chunked = True + r.chunk_left = None + resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'}) + if getattr(self, "assertListEqual", False): + self.assertListEqual(stream, list(resp.stream())) + else: + for index, item in enumerate(resp.stream()): + v = stream[index] + self.assertEqual(item, v) + def test_get_case_insensitive_headers(self): headers = {'host': 'example.com'} r = HTTPResponse(headers=headers) self.assertEqual(r.headers.get('host'), 'example.com') self.assertEqual(r.headers.get('Host'), 'example.com') + +class MockChunkedEncodingResponse(object): + + def __init__(self, content): + """ + content: collection of str, each str is a chunk in response + """ + self.content = content + self.index = 0 # This class iterates over self.content. + self.closed = False + self.cur_chunk = b'' + self.chunks_exhausted = False + + @staticmethod + def _encode_chunk(chunk): + # In the general case, we can't decode the chunk to unicode + length = '%X\r\n' % len(chunk) + return length.encode() + chunk + b'\r\n' + + def _pop_new_chunk(self): + if self.chunks_exhausted: + return b"" + try: + chunk = self.content[self.index] + except IndexError: + chunk = b'' + self.chunks_exhausted = True + else: + self.index += 1 + chunk = self._encode_chunk(chunk) + if not isinstance(chunk, bytes): + chunk = chunk.encode() + return chunk + + def pop_current_chunk(self, amt=-1, till_crlf=False): + if amt > 0 and till_crlf: + raise ValueError("Can't specify amt and till_crlf.") + if len(self.cur_chunk) <= 0: + self.cur_chunk = self._pop_new_chunk() + if till_crlf: + try: + i = self.cur_chunk.index(b"\r\n") + except ValueError: + # No CRLF in current chunk -- probably caused by encoder. + self.cur_chunk = b"" + return b"" + else: + chunk_part = self.cur_chunk[:i+2] + self.cur_chunk = self.cur_chunk[i+2:] + return chunk_part + elif amt <= -1: + chunk_part = self.cur_chunk + self.cur_chunk = b'' + return chunk_part + else: + try: + chunk_part = self.cur_chunk[:amt] + except IndexError: + chunk_part = self.cur_chunk + self.cur_chunk = b'' + else: + self.cur_chunk = self.cur_chunk[amt:] + return chunk_part + + def readline(self): + return self.pop_current_chunk(till_crlf=True) + + def read(self, amt=-1): + return self.pop_current_chunk(amt) + + def flush(self): + # Python 3 wants this method. + pass + + def close(self): + self.closed = True + + +class MockChunkedInvalidEncoding(MockChunkedEncodingResponse): + + def _encode_chunk(self, chunk): + return 'ZZZ\r\n%s\r\n' % chunk.decode() + + +class MockChunkedEncodingWithoutCRLFOnEnd(MockChunkedEncodingResponse): + + def _encode_chunk(self, chunk): + return '%X\r\n%s%s' % (len(chunk), chunk.decode(), + "\r\n" if len(chunk) > 0 else "") + + +class MockChunkedEncodingWithExtensions(MockChunkedEncodingResponse): + + def _encode_chunk(self, chunk): + return '%X;asd=qwe\r\n%s\r\n' % (len(chunk), chunk.decode()) + + +class MockSock(object): + @classmethod + def makefile(cls, *args, **kwargs): + return + + if __name__ == '__main__': unittest.main() diff --git a/test/test_util.py b/test/test_util.py index c850d91..19ba57e 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -15,7 +15,10 @@ from urllib3.util.url import ( split_first, Url, ) -from urllib3.util.ssl_ import resolve_cert_reqs, ssl_wrap_socket +from urllib3.util.ssl_ import ( + resolve_cert_reqs, + ssl_wrap_socket, +) from urllib3.exceptions import ( LocationParseError, TimeoutStateError, @@ -94,6 +97,7 @@ class TestUtil(unittest.TestCase): parse_url_host_map = { 'http://google.com/mail': Url('http', host='google.com', path='/mail'), 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), + 'http://google.com/mail': Url('http', host='google.com', path='mail'), 'google.com/mail': Url(host='google.com', path='/mail'), 'http://google.com/': Url('http', host='google.com', path='/'), 'http://google.com': Url('http', host='google.com'), diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index cc0f011..d6cb162 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -4,6 +4,7 @@ import socket import sys import unittest import time +import warnings import mock @@ -35,6 +36,7 @@ from urllib3.util.timeout import Timeout import tornado from dummyserver.testcase import HTTPDummyServerTestCase +from dummyserver.server import NoIPv6Warning from nose.tools import timed @@ -597,7 +599,11 @@ class TestConnectionPool(HTTPDummyServerTestCase): self.assertRaises(MaxRetryError, pool.request, 'GET', '/test', retries=2) def test_source_address(self): - for addr in VALID_SOURCE_ADDRESSES: + for addr, is_ipv6 in VALID_SOURCE_ADDRESSES: + if is_ipv6 and not socket.has_ipv6: + warnings.warn("No IPv6 support: skipping.", + NoIPv6Warning) + continue pool = HTTPConnectionPool(self.host, self.port, source_address=addr, retries=False) r = pool.request('GET', '/source_address') @@ -612,13 +618,34 @@ class TestConnectionPool(HTTPDummyServerTestCase): self.assertRaises(ProtocolError, pool.request, 'GET', '/source_address') - @onlyPy3 - def test_httplib_headers_case_insensitive(self): - HEADERS = {'Content-Length': '0', 'Content-type': 'text/plain', - 'Server': 'TornadoServer/%s' % tornado.version} - r = self.pool.request('GET', '/specific_method', - fields={'method': 'GET'}) - self.assertEqual(HEADERS, dict(r.headers.items())) # to preserve case sensitivity + def test_stream_keepalive(self): + x = 2 + + for _ in range(x): + response = self.pool.request( + 'GET', + '/chunked', + headers={ + 'Connection': 'keep-alive', + }, + preload_content=False, + retries=False, + ) + for chunk in response.stream(): + self.assertEqual(chunk, b'123') + + self.assertEqual(self.pool.num_connections, 1) + self.assertEqual(self.pool.num_requests, x) + + def test_chunked_gzip(self): + response = self.pool.request( + 'GET', + '/chunked_gzip', + preload_content=False, + decode_content=True, + ) + + self.assertEqual(b'123' * 4, response.read()) class TestRetry(HTTPDummyServerTestCase): diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index 16ca589..992b8ef 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -30,10 +30,17 @@ from urllib3.exceptions import ( ConnectTimeoutError, InsecureRequestWarning, SystemTimeWarning, + InsecurePlatformWarning, ) +from urllib3.packages import six from urllib3.util.timeout import Timeout +ResourceWarning = getattr( + six.moves.builtins, + 'ResourceWarning', type('ResourceWarning', (), {})) + + log = logging.getLogger('urllib3.connectionpool') log.setLevel(logging.NOTSET) log.addHandler(logging.StreamHandler(sys.stdout)) @@ -64,7 +71,14 @@ class TestHTTPS(HTTPSDummyServerTestCase): with mock.patch('warnings.warn') as warn: r = https_pool.request('GET', '/') self.assertEqual(r.status, 200) - self.assertFalse(warn.called, warn.call_args_list) + + if sys.version_info >= (2, 7, 9): + self.assertFalse(warn.called, warn.call_args_list) + else: + self.assertTrue(warn.called) + call, = warn.call_args_list + error = call[0][1] + self.assertEqual(error, InsecurePlatformWarning) def test_invalid_common_name(self): https_pool = HTTPSConnectionPool('127.0.0.1', self.port, @@ -137,8 +151,11 @@ class TestHTTPS(HTTPSDummyServerTestCase): self.assertEqual(r.status, 200) self.assertTrue(warn.called) - call, = warn.call_args_list - category = call[0][1] + calls = warn.call_args_list + if sys.version_info >= (2, 7, 9): + category = calls[0][0][1] + else: + category = calls[1][0][1] self.assertEqual(category, InsecureRequestWarning) @requires_network @@ -202,6 +219,16 @@ class TestHTTPS(HTTPSDummyServerTestCase): '7A:F2:8A:D7:1E:07:33:67:DE' https_pool.request('GET', '/') + def test_assert_fingerprint_sha256(self): + https_pool = HTTPSConnectionPool('localhost', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + https_pool.assert_fingerprint = ('9A:29:9D:4F:47:85:1C:51:23:F5:9A:A3:' + '0F:5A:EF:96:F9:2E:3C:22:2E:FC:E8:BC:' + '0E:73:90:37:ED:3B:AA:AB') + https_pool.request('GET', '/') + def test_assert_invalid_fingerprint(self): https_pool = HTTPSConnectionPool('127.0.0.1', self.port, cert_reqs='CERT_REQUIRED', @@ -240,6 +267,15 @@ class TestHTTPS(HTTPSDummyServerTestCase): '7A:F2:8A:D7:1E:07:33:67:DE' https_pool.request('GET', '/') + def test_good_fingerprint_and_hostname_mismatch(self): + https_pool = HTTPSConnectionPool('127.0.0.1', self.port, + cert_reqs='CERT_REQUIRED', + ca_certs=DEFAULT_CA) + + https_pool.assert_fingerprint = 'CC:45:6A:90:82:F7FF:C0:8218:8e:' \ + '7A:F2:8A:D7:1E:07:33:67:DE' + https_pool.request('GET', '/') + @requires_network def test_https_timeout(self): timeout = Timeout(connect=0.001) @@ -332,10 +368,8 @@ class TestHTTPS(HTTPSDummyServerTestCase): def test_ssl_correct_system_time(self): self._pool.cert_reqs = 'CERT_REQUIRED' self._pool.ca_certs = DEFAULT_CA - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - self._pool.request('GET', '/') + w = self._request_without_resource_warnings('GET', '/') self.assertEqual([], w) def test_ssl_wrong_system_time(self): @@ -344,9 +378,7 @@ class TestHTTPS(HTTPSDummyServerTestCase): with mock.patch('urllib3.connection.datetime') as mock_date: mock_date.date.today.return_value = datetime.date(1970, 1, 1) - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - self._pool.request('GET', '/') + w = self._request_without_resource_warnings('GET', '/') self.assertEqual(len(w), 1) warning = w[0] @@ -354,6 +386,13 @@ class TestHTTPS(HTTPSDummyServerTestCase): self.assertEqual(SystemTimeWarning, warning.category) self.assertTrue(str(RECENT_DATE) in warning.message.args[0]) + def _request_without_resource_warnings(self, method, url): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + self._pool.request(method, url) + + return [x for x in w if not isinstance(x.message, ResourceWarning)] + class TestHTTPS_TLSv1(HTTPSDummyServerTestCase): certs = DEFAULT_CERTS.copy() diff --git a/test/with_dummyserver/test_no_ssl.py b/test/with_dummyserver/test_no_ssl.py new file mode 100644 index 0000000..f266d49 --- /dev/null +++ b/test/with_dummyserver/test_no_ssl.py @@ -0,0 +1,29 @@ +""" +Test connections without the builtin ssl module + +Note: Import urllib3 inside the test functions to get the importblocker to work +""" +from ..test_no_ssl import TestWithoutSSL + +from dummyserver.testcase import ( + HTTPDummyServerTestCase, HTTPSDummyServerTestCase) + + +class TestHTTPWithoutSSL(HTTPDummyServerTestCase, TestWithoutSSL): + def test_simple(self): + import urllib3 + + pool = urllib3.HTTPConnectionPool(self.host, self.port) + r = pool.request('GET', '/') + self.assertEqual(r.status, 200, r.data) + + +class TestHTTPSWithoutSSL(HTTPSDummyServerTestCase, TestWithoutSSL): + def test_simple(self): + import urllib3 + + pool = urllib3.HTTPSConnectionPool(self.host, self.port) + try: + pool.request('GET', '/') + except urllib3.exceptions.SSLError as e: + self.assertTrue('SSL module is not available' in str(e)) diff --git a/test/with_dummyserver/test_poolmanager.py b/test/with_dummyserver/test_poolmanager.py index 52ff974..7e51c73 100644 --- a/test/with_dummyserver/test_poolmanager.py +++ b/test/with_dummyserver/test_poolmanager.py @@ -6,6 +6,7 @@ from dummyserver.testcase import (HTTPDummyServerTestCase, from urllib3.poolmanager import PoolManager from urllib3.connectionpool import port_by_scheme from urllib3.exceptions import MaxRetryError, SSLError +from urllib3.util.retry import Retry class TestPoolManager(HTTPDummyServerTestCase): @@ -78,6 +79,34 @@ class TestPoolManager(HTTPDummyServerTestCase): self.assertEqual(r._pool.host, self.host_alt) + def test_too_many_redirects(self): + http = PoolManager() + + try: + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/redirect?target=%s/' % (self.base_url, self.base_url)}, + retries=1) + self.fail("Failed to raise MaxRetryError exception, returned %r" % r.status) + except MaxRetryError: + pass + + try: + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/redirect?target=%s/' % (self.base_url, self.base_url)}, + retries=Retry(total=None, redirect=1)) + self.fail("Failed to raise MaxRetryError exception, returned %r" % r.status) + except MaxRetryError: + pass + + def test_raise_on_redirect(self): + http = PoolManager() + + r = http.request('GET', '%s/redirect' % self.base_url, + fields={'target': '%s/redirect?target=%s/' % (self.base_url, self.base_url)}, + retries=Retry(total=None, redirect=1, raise_on_redirect=False)) + + self.assertEqual(r.status, 303) + def test_missing_port(self): # Can a URL that lacks an explicit port like ':80' succeed, or # will all such URLs fail with an error? diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py index c1ef1be..6c99653 100644 --- a/test/with_dummyserver/test_socketlevel.py +++ b/test/with_dummyserver/test_socketlevel.py @@ -18,6 +18,8 @@ from dummyserver.testcase import SocketDummyServerTestCase from dummyserver.server import ( DEFAULT_CERTS, DEFAULT_CA, get_unreachable_address) +from .. import onlyPy3 + from nose.plugins.skip import SkipTest from threading import Event import socket @@ -44,6 +46,7 @@ class TestCookies(SocketDummyServerTestCase): pool = HTTPConnectionPool(self.host, self.port) r = pool.request('GET', '/', retries=0) self.assertEqual(r.headers, {'set-cookie': 'foo=1, bar=1'}) + self.assertEqual(r.headers.getlist('set-cookie'), ['foo=1', 'bar=1']) class TestSNI(SocketDummyServerTestCase): @@ -521,6 +524,43 @@ class TestSSL(SocketDummyServerTestCase): finally: timed_out.set() + def test_ssl_failed_fingerprint_verification(self): + def socket_handler(listener): + for i in range(2): + sock = listener.accept()[0] + ssl_sock = ssl.wrap_socket(sock, + server_side=True, + keyfile=DEFAULT_CERTS['keyfile'], + certfile=DEFAULT_CERTS['certfile'], + ca_certs=DEFAULT_CA) + + ssl_sock.send(b'HTTP/1.1 200 OK\r\n' + b'Content-Type: text/plain\r\n' + b'Content-Length: 5\r\n\r\n' + b'Hello') + + ssl_sock.close() + sock.close() + + self._start_server(socket_handler) + # GitHub's fingerprint. Valid, but not matching. + fingerprint = ('A0:C4:A7:46:00:ED:A7:2D:C0:BE:CB' + ':9A:8C:B6:07:CA:58:EE:74:5E') + + def request(): + try: + pool = HTTPSConnectionPool(self.host, self.port, + assert_fingerprint=fingerprint) + response = pool.urlopen('GET', '/', preload_content=False, + timeout=Timeout(connect=1, read=0.001)) + response.read() + finally: + pool.close() + + self.assertRaises(SSLError, request) + # Should not hang, see https://github.com/shazow/urllib3/issues/529 + self.assertRaises(SSLError, request) + def consume_socket(sock, chunks=65536): while not sock.recv(chunks).endswith(b'\r\n\r\n'): @@ -560,3 +600,49 @@ class TestErrorWrapping(SocketDummyServerTestCase): self._start_server(handler) pool = HTTPConnectionPool(self.host, self.port, retries=False) self.assertRaises(ProtocolError, pool.request, 'GET', '/') + +class TestHeaders(SocketDummyServerTestCase): + + @onlyPy3 + def test_httplib_headers_case_insensitive(self): + handler = create_response_handler( + b'HTTP/1.1 200 OK\r\n' + b'Content-Length: 0\r\n' + b'Content-type: text/plain\r\n' + b'\r\n' + ) + self._start_server(handler) + pool = HTTPConnectionPool(self.host, self.port, retries=False) + HEADERS = {'Content-Length': '0', 'Content-type': 'text/plain'} + r = pool.request('GET', '/') + self.assertEqual(HEADERS, dict(r.headers.items())) # to preserve case sensitivity + + +class TestHEAD(SocketDummyServerTestCase): + def test_chunked_head_response_does_not_hang(self): + handler = create_response_handler( + b'HTTP/1.1 200 OK\r\n' + b'Transfer-Encoding: chunked\r\n' + b'Content-type: text/plain\r\n' + b'\r\n' + ) + self._start_server(handler) + pool = HTTPConnectionPool(self.host, self.port, retries=False) + r = pool.request('HEAD', '/', timeout=1, preload_content=False) + + # stream will use the read_chunked method here. + self.assertEqual([], list(r.stream())) + + def test_empty_head_response_does_not_hang(self): + handler = create_response_handler( + b'HTTP/1.1 200 OK\r\n' + b'Content-Length: 256\r\n' + b'Content-type: text/plain\r\n' + b'\r\n' + ) + self._start_server(handler) + pool = HTTPConnectionPool(self.host, self.port, retries=False) + r = pool.request('HEAD', '/', timeout=1, preload_content=False) + + # stream will use the read method here. + self.assertEqual([], list(r.stream())) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 7b5cf18..123e7be 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.10 +Version: 1.10.4 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -156,6 +156,69 @@ Description: ======= Changes ======= + 1.10.4 (2015-05-03) + +++++++++++++++++++ + + * Migrate tests to Tornado 4. (Issue #594) + + * Append default warning configuration rather than overwrite. + (Issue #603) + + * Fix streaming decoding regression. (Issue #595) + + * Fix chunked requests losing state across keep-alive connections. + (Issue #599) + + * Fix hanging when chunked HEAD response has no body. (Issue #605) + + + 1.10.3 (2015-04-21) + +++++++++++++++++++ + + * Emit ``InsecurePlatformWarning`` when SSLContext object is missing. + (Issue #558) + + * Fix regression of duplicate header keys being discarded. + (Issue #563) + + * ``Response.stream()`` returns a generator for chunked responses. + (Issue #560) + + * Set upper-bound timeout when waiting for a socket in PyOpenSSL. + (Issue #585) + + * Work on platforms without `ssl` module for plain HTTP requests. + (Issue #587) + + * Stop relying on the stdlib's default cipher list. (Issue #588) + + + 1.10.2 (2015-02-25) + +++++++++++++++++++ + + * Fix file descriptor leakage on retries. (Issue #548) + + * Removed RC4 from default cipher list. (Issue #551) + + * Header performance improvements. (Issue #544) + + * Fix PoolManager not obeying redirect retry settings. (Issue #553) + + + 1.10.1 (2015-02-10) + +++++++++++++++++++ + + * Pools can be used as context managers. (Issue #545) + + * Don't re-use connections which experienced an SSLError. (Issue #529) + + * Don't fail when gzip decoding an empty stream. (Issue #535) + + * Add sha256 support for fingerprint verification. (Issue #540) + + * Fixed handling of header values containing commas. (Issue #533) + + 1.10 (2014-12-14) +++++++++++++++++ @@ -197,7 +260,7 @@ Description: ======= * Fixed packaging issues of some development-related files not getting included. (Issue #440) - + * Allow performing *only* fingerprint verification. (Issue #444) * Emit ``SecurityWarning`` if system clock is waaay off. (Issue #445) diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 6cb0fcf..16d8476 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -48,6 +48,7 @@ test/test_connectionpool.py test/test_exceptions.py test/test_fields.py test/test_filepost.py +test/test_no_ssl.py test/test_poolmanager.py test/test_proxymanager.py test/test_response.py @@ -58,6 +59,7 @@ test/contrib/test_pyopenssl.py test/with_dummyserver/__init__.py test/with_dummyserver/test_connectionpool.py test/with_dummyserver/test_https.py +test/with_dummyserver/test_no_ssl.py test/with_dummyserver/test_poolmanager.py test/with_dummyserver/test_proxy_poolmanager.py test/with_dummyserver/test_socketlevel.py diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 4f9d4a7..f48ac4a 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.10' +__version__ = '1.10.4' from .connectionpool import ( @@ -55,9 +55,12 @@ def add_stderr_logger(level=logging.DEBUG): del NullHandler -# Set security warning to always go off by default. import warnings -warnings.simplefilter('always', exceptions.SecurityWarning) +# SecurityWarning's always go off by default. +warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter('default', exceptions.InsecurePlatformWarning, + append=True) def disable_warnings(category=exceptions.HTTPWarning): """ diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 784342a..279416c 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -1,7 +1,7 @@ from collections import Mapping, MutableMapping try: from threading import RLock -except ImportError: # Platform-specific: No threads available +except ImportError: # Platform-specific: No threads available class RLock: def __enter__(self): pass @@ -10,11 +10,11 @@ except ImportError: # Platform-specific: No threads available pass -try: # Python 2.7+ +try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict -from .packages.six import iterkeys, itervalues +from .packages.six import iterkeys, itervalues, PY3 __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -97,7 +97,14 @@ class RecentlyUsedContainer(MutableMapping): return list(iterkeys(self._container)) -class HTTPHeaderDict(MutableMapping): +_dict_setitem = dict.__setitem__ +_dict_getitem = dict.__getitem__ +_dict_delitem = dict.__delitem__ +_dict_contains = dict.__contains__ +_dict_setdefault = dict.setdefault + + +class HTTPHeaderDict(dict): """ :param headers: An iterable of field-value pairs. Must not contain multiple field names @@ -129,25 +136,75 @@ class HTTPHeaderDict(MutableMapping): 'foo=bar, baz=quxx' >>> headers['Content-Length'] '7' - - If you want to access the raw headers with their original casing - for debugging purposes you can access the private ``._data`` attribute - which is a normal python ``dict`` that maps the case-insensitive key to a - list of tuples stored as (case-sensitive-original-name, value). Using the - structure from above as our example: - - >>> headers._data - {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], - 'content-length': [('content-length', '7')]} """ def __init__(self, headers=None, **kwargs): - self._data = {} - if headers is None: - headers = {} - self.update(headers, **kwargs) + dict.__init__(self) + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) + + def __setitem__(self, key, val): + return _dict_setitem(self, key.lower(), (key, val)) + + def __getitem__(self, key): + val = _dict_getitem(self, key.lower()) + return ', '.join(val[1:]) + + def __delitem__(self, key): + return _dict_delitem(self, key.lower()) - def add(self, key, value): + def __contains__(self, key): + return _dict_contains(self, key.lower()) + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return dict((k1, self[k1]) for k1 in self) == dict((k2, other[k2]) for k2 in other) + + def __ne__(self, other): + return not self.__eq__(other) + + values = MutableMapping.values + get = MutableMapping.get + update = MutableMapping.update + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): """Adds a (name, value) pair, doesn't overwrite the value if it already exists. @@ -156,43 +213,111 @@ class HTTPHeaderDict(MutableMapping): >>> headers['foo'] 'bar, baz' """ - self._data.setdefault(key.lower(), []).append((key, value)) + key_lower = key.lower() + new_vals = key, val + # Keep the common case aka no item present as fast as possible + vals = _dict_setdefault(self, key_lower, new_vals) + if new_vals is not vals: + # new_vals was not inserted, as there was a previous one + if isinstance(vals, list): + # If already several items got inserted, we have a list + vals.append(val) + else: + # vals should be a tuple then, i.e. only one item so far + # Need to convert the tuple to list for further extension + _dict_setitem(self, key_lower, [vals[0], vals[1], val]) + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError("extend() takes at most 1 positional " + "arguments ({} given)".format(len(args))) + other = args[0] if len(args) >= 1 else () + + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) def getlist(self, key): """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" - return self[key].split(', ') if key in self else [] - - def copy(self): - h = HTTPHeaderDict() - for key in self._data: - for rawkey, value in self._data[key]: - h.add(rawkey, value) - return h - - def __eq__(self, other): - if not isinstance(other, Mapping): - return False - other = HTTPHeaderDict(other) - return dict((k1, self[k1]) for k1 in self._data) == \ - dict((k2, other[k2]) for k2 in other._data) - - def __getitem__(self, key): - values = self._data[key.lower()] - return ', '.join(value[1] for value in values) - - def __setitem__(self, key, value): - self._data[key.lower()] = [(key, value)] + try: + vals = _dict_getitem(self, key.lower()) + except KeyError: + return [] + else: + if isinstance(vals, tuple): + return [vals[1]] + else: + return vals[1:] + + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist - def __delitem__(self, key): - del self._data[key.lower()] + def __repr__(self): + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) - def __len__(self): - return len(self._data) + def _copy_from(self, other): + for key in other: + val = _dict_getitem(other, key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + _dict_setitem(self, key, val) - def __iter__(self): - for headers in itervalues(self._data): - yield headers[0][0] - - def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = _dict_getitem(self, key) + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = _dict_getitem(self, key) + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + headers = [] + + for line in message.headers: + if line.startswith((' ', '\t')): + key, value = headers[-1] + headers[-1] = (key, value + '\r\n' + line.rstrip()) + continue + + key, value = line.split(':', 1) + headers.append((key, value.strip())) + + return cls(headers) diff --git a/urllib3/connection.py b/urllib3/connection.py index e5de769..2a8c359 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -260,3 +260,5 @@ if ssl: # Make a copy for testing. UnverifiedHTTPSConnection = HTTPSConnection HTTPSConnection = VerifiedHTTPSConnection +else: + HTTPSConnection = DummyConnection diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 8bdf228..117269a 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -72,6 +72,21 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(): + """ + Close all pooled connections and disable the pool. + """ + pass + + # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) @@ -353,7 +368,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() @@ -558,6 +573,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = None raise SSLError(e) + except SSLError: + # Treat SSLError separately from BaseSSLError to preserve + # traceback. + if conn: + conn.close() + conn = None + raise + except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: if conn: # Discard the connection for these exceptions. It will be @@ -565,14 +588,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() conn = None - stacktrace = sys.exc_info()[2] if isinstance(e, SocketError) and self.proxy: e = ProxyError('Cannot connect to proxy.', e) elif isinstance(e, (SocketError, HTTPException)): e = ProtocolError('Connection aborted.', e) - retries = retries.increment(method, url, error=e, - _pool=self, _stacktrace=stacktrace) + retries = retries.increment(method, url, error=e, _pool=self, + _stacktrace=sys.exc_info()[2]) retries.sleep() # Keep track of the error for the retry warning. @@ -713,7 +735,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): % (self.num_connections, self.host)) if not self.ConnectionCls or self.ConnectionCls is DummyConnection: - # Platform-specific: Python without ssl raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index ee657fb..b2c34a8 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -38,8 +38,6 @@ Module Variables ---------------- :var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. - Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: - ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) @@ -85,22 +83,7 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } -# A secure default. -# Sources for more information on TLS ciphers: -# -# - https://wiki.mozilla.org/Security/Server_Side_TLS -# - https://www.ssllabs.com/projects/best-practices/index.html -# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ -# -# The general intent is: -# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), -# - prefer ECDHE over DHE for better performance, -# - prefer any AES-GCM over any AES-CBC for better performance and security, -# - use 3DES as fallback which is secure but slow, -# - disable NULL authentication, MD5 MACs and DSS for security reasons. -DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ - "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ - "!aNULL:!MD5:!DSS" +DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS orig_util_HAS_SNI = util.HAS_SNI @@ -299,7 +282,9 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: - select.select([sock], [], []) + rd, _, _ = select.select([sock], [], [], sock.gettimeout()) + if not rd: + raise timeout('select timed out') continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 0c6fd3c..31bda1c 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -157,3 +157,13 @@ class InsecureRequestWarning(SecurityWarning): class SystemTimeWarning(SecurityWarning): "Warned when system time is suspected to be wrong" pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass + + +class ResponseNotChunked(ProtocolError, ValueError): + "Response needs to be chunked in order to read it as chunks." + pass diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 515dc96..b8d1e74 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -8,7 +8,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError +from .exceptions import LocationValueError, MaxRetryError from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -64,6 +64,14 @@ class PoolManager(RequestMethods): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + def _new_pool(self, scheme, host, port): """ Create a new :class:`ConnectionPool` based on host, port and scheme. @@ -167,7 +175,14 @@ class PoolManager(RequestMethods): if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - kw['retries'] = retries.increment(method, redirect_location) + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + + kw['retries'] = retries kw['redirect'] = redirect log.info("Redirecting %s -> %s" % (url, redirect_location)) diff --git a/urllib3/response.py b/urllib3/response.py index e69de95..24140c4 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -1,15 +1,20 @@ +try: + import http.client as httplib +except ImportError: + import httplib import zlib import io from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import ProtocolError, DecodeError, ReadTimeoutError -from .packages.six import string_types as basestring, binary_type +from .exceptions import ( + ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked +) +from .packages.six import string_types as basestring, binary_type, PY3 from .connection import HTTPException, BaseSSLError from .util.response import is_fp_closed - class DeflateDecoder(object): def __init__(self): @@ -21,6 +26,9 @@ class DeflateDecoder(object): return getattr(self._obj, name) def decompress(self, data): + if not data: + return data + if not self._first_try: return self._obj.decompress(data) @@ -36,9 +44,23 @@ class DeflateDecoder(object): self._data = None +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + return self._obj.decompress(data) + + def _get_decoder(mode): if mode == 'gzip': - return zlib.decompressobj(16 + zlib.MAX_WBITS) + return GzipDecoder() return DeflateDecoder() @@ -76,9 +98,10 @@ class HTTPResponse(io.IOBase): strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = HTTPHeaderDict() - if headers: - self.headers.update(headers) + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) self.status = status self.version = version self.reason = reason @@ -100,7 +123,17 @@ class HTTPResponse(io.IOBase): if hasattr(body, 'read'): self._fp = body - if preload_content and not self._body: + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get('transfer-encoding', '').lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + # We certainly don't want to preload content when the response is chunked. + if not self.chunked and preload_content and not self._body: self._body = self.read(decode_content=decode_content) def get_redirect_location(self): @@ -140,6 +173,35 @@ class HTTPResponse(io.IOBase): """ return self._fp_bytes_read + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessar. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + + return data + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -161,12 +223,7 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + self._init_decoder() if decode_content is None: decode_content = self.decode_content @@ -202,7 +259,7 @@ class HTTPResponse(io.IOBase): except BaseSSLError as e: # FIXME: Is there a better way to differentiate between SSLErrors? - if not 'read operation timed out' in str(e): # Defensive: + if 'read operation timed out' not in str(e): # Defensive: # This shouldn't happen but just in case we're missing an edge # case, let's avoid swallowing SSL errors. raise @@ -215,17 +272,7 @@ class HTTPResponse(io.IOBase): self._fp_bytes_read += len(data) - try: - if decode_content and self._decoder: - data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content and self._decoder: - buf = self._decoder.decompress(binary_type()) - data += buf + self._decoder.flush() + data = self._decode(data, decode_content, flush_decoder) if cache_content: self._body = data @@ -252,11 +299,15 @@ class HTTPResponse(io.IOBase): If True, will attempt to decode the body based on the 'content-encoding' header. """ - while not is_fp_closed(self._fp): - data = self.read(amt=amt, decode_content=decode_content) + if self.chunked: + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) - if data: - yield data + if data: + yield data @classmethod def from_httplib(ResponseCls, r, **response_kw): @@ -267,14 +318,16 @@ class HTTPResponse(io.IOBase): Remaining parameters are passed to the HTTPResponse constructor, along with ``original_response=r``. """ - - headers = HTTPHeaderDict() - for k, v in r.getheaders(): - headers.add(k, v) + headers = r.msg + if not isinstance(headers, HTTPHeaderDict): + if PY3: # Python 3 + headers = HTTPHeaderDict(headers.items()) + else: # Python 2 + headers = HTTPHeaderDict.from_httplib(headers) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) - return ResponseCls(body=r, + resp = ResponseCls(body=r, headers=headers, status=r.status, version=r.version, @@ -282,6 +335,7 @@ class HTTPResponse(io.IOBase): strict=strict, original_response=r, **response_kw) + return resp # Backwards-compatibility methods for httplib.HTTPResponse def getheaders(self): @@ -331,3 +385,82 @@ class HTTPResponse(io.IOBase): else: b[:len(temp)] = temp return len(temp) + + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b';', 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. + if not self.chunked: + raise ResponseNotChunked("Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing.") + + if self._original_response and self._original_response._method.upper() == 'HEAD': + # Don't bother reading the body of a HEAD request. + # FIXME: Can we do this somehow without accessing private httplib _method? + self._original_response.close() + return + + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + yield self._decode(chunk, decode_content=decode_content, + flush_decoder=True) + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() + self.release_conn() diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 2156993..859aec6 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -82,6 +82,7 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, err = _ if sock is not None: sock.close() + sock = None if err is not None: raise err diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py index a788b1b..b846d42 100644 --- a/urllib3/util/ssl_.py +++ b/urllib3/util/ssl_.py @@ -1,7 +1,7 @@ from binascii import hexlify, unhexlify -from hashlib import md5, sha1 +from hashlib import md5, sha1, sha256 -from ..exceptions import SSLError +from ..exceptions import SSLError, InsecurePlatformWarning SSLContext = None @@ -9,9 +9,10 @@ HAS_SNI = False create_default_context = None import errno -import ssl +import warnings try: # Test for SSL features + import ssl from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 from ssl import HAS_SNI # Has SNI? except ImportError: @@ -24,14 +25,24 @@ except ImportError: OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 OP_NO_COMPRESSION = 0x20000 -try: - from ssl import _DEFAULT_CIPHERS -except ImportError: - _DEFAULT_CIPHERS = ( - 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' - 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:ECDH+RC4:' - 'DH+RC4:RSA+RC4:!aNULL:!eNULL:!MD5' - ) +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' + '!eNULL:!MD5' +) try: from ssl import SSLContext # Modern SSL? @@ -39,7 +50,8 @@ except ImportError: import sys class SSLContext(object): # Platform-specific: Python 2 & 3.1 - supports_set_ciphers = sys.version_info >= (2, 7) + supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or + (3, 2) <= sys.version_info) def __init__(self, protocol_version): self.protocol = protocol_version @@ -69,6 +81,14 @@ except ImportError: self.ciphers = cipher_suite def wrap_socket(self, socket, server_hostname=None): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. For more information, see ' + 'https://urllib3.readthedocs.org/en/latest/security.html' + '#insecureplatformwarning.', + InsecurePlatformWarning + ) kwargs = { 'keyfile': self.keyfile, 'certfile': self.certfile, @@ -96,7 +116,8 @@ def assert_fingerprint(cert, fingerprint): # this digest. hashfunc_map = { 16: md5, - 20: sha1 + 20: sha1, + 32: sha256, } fingerprint = fingerprint.replace(':', '').lower() @@ -157,7 +178,7 @@ def resolve_ssl_version(candidate): return candidate -def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, +def create_urllib3_context(ssl_version=None, cert_reqs=None, options=None, ciphers=None): """All arguments have the same meaning as ``ssl_wrap_socket``. @@ -194,6 +215,9 @@ def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, """ context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + if options is None: options = 0 # SSLv2 is easily broken and is considered harmful and dangerous @@ -207,11 +231,13 @@ def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, context.options |= options if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 - context.set_ciphers(ciphers or _DEFAULT_CIPHERS) + context.set_ciphers(ciphers or DEFAULT_CIPHERS) context.verify_mode = cert_reqs if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 - context.check_hostname = (context.verify_mode == ssl.CERT_REQUIRED) + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False return context diff --git a/urllib3/util/url.py b/urllib3/util/url.py index b2ec834..e58050c 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -15,6 +15,8 @@ class Url(namedtuple('Url', url_attrs)): def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + if path and not path.startswith('/'): + path = '/' + path return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) |