diff options
31 files changed, 1312 insertions, 538 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 891fd79..ae63682 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,62 @@ Changes ======= +1.8 (2014-03-04) +++++++++++++++++ + +* Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in + username, and blank ports like 'hostname:'). + +* New ``urllib3.connection`` module which contains all the HTTPConnection + objects. + +* Several ``urllib3.util.Timeout``-related fixes. Also changed constructor + signature to a more sensible order. [Backwards incompatible] + (Issues #252, #262, #263) + +* Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274) + +* Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which + returns the number of bytes read so far. (Issue #277) + +* Support for platforms without threading. (Issue #289) + +* Expand default-port comparison in ``HTTPConnectionPool.is_same_host`` + to allow a pool with no specified port to be considered equal to to an + HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305) + +* Improved default SSL/TLS settings to avoid vulnerabilities. + (Issue #309) + +* Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors. + (Issue #310) + +* Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests + will send the entire HTTP request ~200 milliseconds faster; however, some of + the resulting TCP packets will be smaller. (Issue #254) + +* Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl`` + from the default 64 to 1024 in a single certificate. (Issue #318) + +* Headers are now passed and stored as a custom + ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``. + (Issue #329, #333) + +* Headers no longer lose their case on Python 3. (Issue #236) + +* ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA + certificates on inject. (Issue #332) + +* Requests with ``retries=False`` will immediately raise any exceptions without + wrapping them in ``MaxRetryError``. (Issue #348) + +* Fixed open socket leak with SSL-related failures. (Issue #344, #348) + + 1.7.1 (2013-09-25) ++++++++++++++++++ -* Added granular timeout support with new `urllib3.util.Timeout` class. +* Added granular timeout support with new ``urllib3.util.Timeout`` class. (Issue #231) * Fixed Python 3.4 support. (Issue #238) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e1aca42..e2dba35 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -90,5 +90,26 @@ In chronological order: * Kevin Burke <kev@inburke.com> and Pavel Kirichenko <juanych@yandex-team.ru> * Support for separate connect and request timeouts +* Peter Waller <p@pwaller.net> + * HTTPResponse.tell() for determining amount received over the wire + +* Nipunn Koorapati <nipunn1313@gmail.com> + * Ignore default ports when comparing hosts for equality + +* Danilo @dbrgn <http://dbrgn.ch/> + * Disabled TLS compression by default on Python 3.2+ + * Disabled TLS compression in pyopenssl contrib module + * Configurable cipher suites in pyopenssl contrib module + +* Roman Bogorodskiy <roman.bogorodskiy@ericsson.com> + * Account retries on proxy errors + +* Nicolas Delaby <nicolas.delaby@ezeep.com> + * Use the platform-specific CA certificate locations + +* Josh Schneier <https://github.com/jschneier> + * HTTPHeaderDict and associated tests and docs + * Bugfixes, docs, test coverage + * [Your name or handle] <[email or website]> * [Brief summary of your changes] @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.7.1 +Version: 1.8 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -121,10 +121,62 @@ Description: ======= Changes ======= + 1.8 (2014-03-04) + ++++++++++++++++ + + * Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in + username, and blank ports like 'hostname:'). + + * New ``urllib3.connection`` module which contains all the HTTPConnection + objects. + + * Several ``urllib3.util.Timeout``-related fixes. Also changed constructor + signature to a more sensible order. [Backwards incompatible] + (Issues #252, #262, #263) + + * Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274) + + * Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which + returns the number of bytes read so far. (Issue #277) + + * Support for platforms without threading. (Issue #289) + + * Expand default-port comparison in ``HTTPConnectionPool.is_same_host`` + to allow a pool with no specified port to be considered equal to to an + HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305) + + * Improved default SSL/TLS settings to avoid vulnerabilities. + (Issue #309) + + * Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors. + (Issue #310) + + * Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests + will send the entire HTTP request ~200 milliseconds faster; however, some of + the resulting TCP packets will be smaller. (Issue #254) + + * Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl`` + from the default 64 to 1024 in a single certificate. (Issue #318) + + * Headers are now passed and stored as a custom + ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``. + (Issue #329, #333) + + * Headers no longer lose their case on Python 3. (Issue #236) + + * ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA + certificates on inject. (Issue #332) + + * Requests with ``retries=False`` will immediately raise any exceptions without + wrapping them in ``MaxRetryError``. (Issue #348) + + * Fixed open socket leak with SSL-related failures. (Issue #344, #348) + + 1.7.1 (2013-09-25) ++++++++++++++++++ - * Added granular timeout support with new `urllib3.util.Timeout` class. + * Added granular timeout support with new ``urllib3.util.Timeout`` class. (Issue #231) * Fixed Python 3.4 support. (Issue #238) diff --git a/dummyserver/server.py b/dummyserver/server.py index f4f98a4..22de456 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -5,21 +5,21 @@ Dummy server used for unit testing. """ from __future__ import print_function +import errno import logging import os +import random +import string import sys import threading import socket -from tornado import netutil +from tornado.platform.auto import set_close_exec import tornado.wsgi import tornado.httpserver import tornado.ioloop import tornado.web -from dummyserver.handlers import TestingApp -from dummyserver.proxy import ProxyHandler - log = logging.getLogger(__name__) @@ -51,7 +51,7 @@ class SocketServerThread(threading.Thread): self.ready_event = ready_event def _start_server(self): - sock = socket.socket() + sock = socket.socket(socket.AF_INET6) if sys.platform != 'win32': sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind((self.host, 0)) @@ -70,59 +70,112 @@ class SocketServerThread(threading.Thread): self.server = self._start_server() -class TornadoServerThread(threading.Thread): - app = tornado.wsgi.WSGIContainer(TestingApp()) +# FIXME: there is a pull request patching bind_sockets in Tornado directly. +# If it gets merged and released we can drop this and use +# `tornado.netutil.bind_sockets` again. +# https://github.com/facebook/tornado/pull/977 - def __init__(self, host='localhost', scheme='http', certs=None, - ready_event=None): - threading.Thread.__init__(self) +def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, + flags=None): + """Creates listening sockets bound to the given port and address. - self.host = host - self.scheme = scheme - self.certs = certs - self.ready_event = ready_event + Returns a list of socket objects (multiple sockets are returned if + the given address maps to multiple IP addresses, which is most common + for mixed IPv4 and IPv6 use). - def _start_server(self): - if self.scheme == 'https': - http_server = tornado.httpserver.HTTPServer(self.app, - ssl_options=self.certs) - else: - http_server = tornado.httpserver.HTTPServer(self.app) + Address may be either an IP address or hostname. If it's a hostname, + the server will listen on all IP addresses associated with the + name. Address may be an empty string or None to listen on all + available interfaces. Family may be set to either `socket.AF_INET` + or `socket.AF_INET6` to restrict to IPv4 or IPv6 addresses, otherwise + both will be used if available. - family = socket.AF_INET6 if ':' in self.host else socket.AF_INET - sock, = netutil.bind_sockets(None, address=self.host, family=family) - self.port = sock.getsockname()[1] - http_server.add_sockets([sock]) - return http_server + The ``backlog`` argument has the same meaning as for + `socket.listen() <socket.socket.listen>`. - def run(self): - self.ioloop = tornado.ioloop.IOLoop.instance() - self.server = self._start_server() - if self.ready_event: - self.ready_event.set() - self.ioloop.start() - - def stop(self): - self.ioloop.add_callback(self.server.stop) - self.ioloop.add_callback(self.ioloop.stop) - - -class ProxyServerThread(TornadoServerThread): - app = tornado.web.Application([(r'.*', ProxyHandler)]) - - -if __name__ == '__main__': - log.setLevel(logging.DEBUG) - log.addHandler(logging.StreamHandler(sys.stderr)) - - from urllib3 import get_host + ``flags`` is a bitmask of AI_* flags to `~socket.getaddrinfo`, like + ``socket.AI_PASSIVE | socket.AI_NUMERICHOST``. + """ + sockets = [] + if address == "": + address = None + if not socket.has_ipv6 and family == socket.AF_UNSPEC: + # Python can be compiled with --disable-ipv6, which causes + # operations on AF_INET6 sockets to fail, but does not + # automatically exclude those results from getaddrinfo + # results. + # http://bugs.python.org/issue16208 + family = socket.AF_INET + if flags is None: + flags = socket.AI_PASSIVE + binded_port = None + for res in set(socket.getaddrinfo(address, port, family, + socket.SOCK_STREAM, 0, flags)): + af, socktype, proto, canonname, sockaddr = res + try: + sock = socket.socket(af, socktype, proto) + except socket.error as e: + if e.args[0] == errno.EAFNOSUPPORT: + continue + raise + set_close_exec(sock.fileno()) + if os.name != 'nt': + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if af == socket.AF_INET6: + # On linux, ipv6 sockets accept ipv4 too by default, + # but this makes it impossible to bind to both + # 0.0.0.0 in ipv4 and :: in ipv6. On other systems, + # separate sockets *must* be used to listen for both ipv4 + # and ipv6. For consistency, always disable ipv4 on our + # ipv6 sockets and use a separate ipv4 socket when needed. + # + # Python 2.x on windows doesn't have IPPROTO_IPV6. + if hasattr(socket, "IPPROTO_IPV6"): + sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + + # automatic port allocation with port=None + # should bind on the same port on IPv4 and IPv6 + host, requested_port = sockaddr[:2] + if requested_port == 0 and binded_port is not None: + sockaddr = tuple([host, binded_port] + list(sockaddr[2:])) + + sock.setblocking(0) + sock.bind(sockaddr) + binded_port = sock.getsockname()[1] + sock.listen(backlog) + sockets.append(sock) + return sockets + + +def run_tornado_app(app, io_loop, certs, scheme, host): + if scheme == 'https': + http_server = tornado.httpserver.HTTPServer(app, ssl_options=certs, + io_loop=io_loop) + else: + http_server = tornado.httpserver.HTTPServer(app, io_loop=io_loop) + + sockets = bind_sockets(None, address=host) + port = sockets[0].getsockname()[1] + http_server.add_sockets(sockets) + return http_server, port + + +def run_loop_in_thread(io_loop): + t = threading.Thread(target=io_loop.start) + t.start() + return t - url = "http://localhost:8081" - if len(sys.argv) > 1: - url = sys.argv[1] - print("Starting WSGI server at: %s" % url) +def get_unreachable_address(): + while True: + host = ''.join(random.choice(string.ascii_lowercase) + for _ in range(60)) + sockaddr = (host, 54321) - scheme, host, port = get_host(url) - t = TornadoServerThread(scheme=scheme, host=host, port=port) - t.start() + # check if we are really "lucky" and hit an actual server + try: + s = socket.create_connection(sockaddr) + except socket.error: + return sockaddr + else: + s.close() diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index a2a1da1..35769ef 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -2,14 +2,17 @@ import unittest import socket import threading from nose.plugins.skip import SkipTest +from tornado import ioloop, web, wsgi from dummyserver.server import ( - TornadoServerThread, SocketServerThread, + SocketServerThread, + run_tornado_app, + run_loop_in_thread, DEFAULT_CERTS, - ProxyServerThread, ) +from dummyserver.handlers import TestingApp +from dummyserver.proxy import ProxyHandler -has_ipv6 = hasattr(socket, 'has_ipv6') class SocketDummyServerTestCase(unittest.TestCase): @@ -33,7 +36,7 @@ class SocketDummyServerTestCase(unittest.TestCase): @classmethod def tearDownClass(cls): if hasattr(cls, 'server_thread'): - cls.server_thread.join() + cls.server_thread.join(0.1) class HTTPDummyServerTestCase(unittest.TestCase): @@ -44,18 +47,16 @@ class HTTPDummyServerTestCase(unittest.TestCase): @classmethod def _start_server(cls): - ready_event = threading.Event() - cls.server_thread = TornadoServerThread(host=cls.host, - scheme=cls.scheme, - certs=cls.certs, - ready_event=ready_event) - cls.server_thread.start() - ready_event.wait() - cls.port = cls.server_thread.port + cls.io_loop = ioloop.IOLoop() + app = wsgi.WSGIContainer(TestingApp()) + cls.server, cls.port = run_tornado_app(app, cls.io_loop, cls.certs, + cls.scheme, cls.host) + cls.server_thread = run_loop_in_thread(cls.io_loop) @classmethod def _stop_server(cls): - cls.server_thread.stop() + cls.io_loop.add_callback(cls.server.stop) + cls.io_loop.add_callback(cls.io_loop.stop) cls.server_thread.join() @classmethod @@ -87,27 +88,29 @@ class HTTPDummyProxyTestCase(unittest.TestCase): @classmethod def setUpClass(cls): - cls.http_thread = TornadoServerThread(host=cls.http_host, - scheme='http') - cls.http_thread._start_server() - cls.http_port = cls.http_thread.port + cls.io_loop = ioloop.IOLoop() - cls.https_thread = TornadoServerThread( - host=cls.https_host, scheme='https', certs=cls.https_certs) - cls.https_thread._start_server() - cls.https_port = cls.https_thread.port + app = wsgi.WSGIContainer(TestingApp()) + cls.http_server, cls.http_port = run_tornado_app( + app, cls.io_loop, None, 'http', cls.http_host) - ready_event = threading.Event() - cls.proxy_thread = ProxyServerThread( - host=cls.proxy_host, ready_event=ready_event) - cls.proxy_thread.start() - ready_event.wait() - cls.proxy_port = cls.proxy_thread.port + app = wsgi.WSGIContainer(TestingApp()) + cls.https_server, cls.https_port = run_tornado_app( + app, cls.io_loop, cls.https_certs, 'https', cls.http_host) + + app = web.Application([(r'.*', ProxyHandler)]) + cls.proxy_server, cls.proxy_port = run_tornado_app( + app, cls.io_loop, None, 'http', cls.proxy_host) + + cls.server_thread = run_loop_in_thread(cls.io_loop) @classmethod def tearDownClass(cls): - cls.proxy_thread.stop() - cls.proxy_thread.join() + cls.io_loop.add_callback(cls.http_server.stop) + cls.io_loop.add_callback(cls.https_server.stop) + cls.io_loop.add_callback(cls.proxy_server.stop) + cls.io_loop.add_callback(cls.io_loop.stop) + cls.server_thread.join() class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase): @@ -115,7 +118,7 @@ class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase): @classmethod def setUpClass(cls): - if not has_ipv6: + if not socket.has_ipv6: raise SkipTest('IPv6 not available') else: super(IPv6HTTPDummyServerTestCase, cls).setUpClass() diff --git a/test-requirements.txt b/test-requirements.txt index f7c3a50..02d70f4 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,4 @@ nose==1.3 mock==1.0.1 -tornado==2.4.1 +tornado==3.1.1 coverage==3.6 diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/test/__init__.py +++ /dev/null diff --git a/test/benchmark.py b/test/benchmark.py deleted file mode 100644 index e7049c4..0000000 --- a/test/benchmark.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -""" -Really simple rudimentary benchmark to compare ConnectionPool versus standard -urllib to demonstrate the usefulness of connection re-using. -""" -from __future__ import print_function - -import sys -import time -import urllib - -sys.path.append('../') -import urllib3 - - -# URLs to download. Doesn't matter as long as they're from the same host, so we -# can take advantage of connection re-using. -TO_DOWNLOAD = [ - 'http://code.google.com/apis/apps/', - 'http://code.google.com/apis/base/', - 'http://code.google.com/apis/blogger/', - 'http://code.google.com/apis/calendar/', - 'http://code.google.com/apis/codesearch/', - 'http://code.google.com/apis/contact/', - 'http://code.google.com/apis/books/', - 'http://code.google.com/apis/documents/', - 'http://code.google.com/apis/finance/', - 'http://code.google.com/apis/health/', - 'http://code.google.com/apis/notebook/', - 'http://code.google.com/apis/picasaweb/', - 'http://code.google.com/apis/spreadsheets/', - 'http://code.google.com/apis/webmastertools/', - 'http://code.google.com/apis/youtube/', -] - - -def urllib_get(url_list): - assert url_list - for url in url_list: - now = time.time() - r = urllib.urlopen(url) - elapsed = time.time() - now - print("Got in %0.3f: %s" % (elapsed, url)) - - -def pool_get(url_list): - assert url_list - pool = urllib3.connection_from_url(url_list[0]) - for url in url_list: - now = time.time() - r = pool.get_url(url) - elapsed = time.time() - now - print("Got in %0.3fs: %s" % (elapsed, url)) - - -if __name__ == '__main__': - print("Running pool_get ...") - now = time.time() - pool_get(TO_DOWNLOAD) - pool_elapsed = time.time() - now - - print("Running urllib_get ...") - now = time.time() - urllib_get(TO_DOWNLOAD) - urllib_elapsed = time.time() - now - - print("Completed pool_get in %0.3fs" % pool_elapsed) - print("Completed urllib_get in %0.3fs" % urllib_elapsed) - - -""" -Example results: - -Completed pool_get in 1.163s -Completed urllib_get in 2.318s -""" diff --git a/test/test_collections.py b/test/test_collections.py index b44c58a..4d173ac 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -1,6 +1,9 @@ import unittest -from urllib3._collections import RecentlyUsedContainer as Container +from urllib3._collections import ( + HTTPHeaderDict, + RecentlyUsedContainer as Container +) from urllib3.packages import six xrange = six.moves.xrange @@ -121,5 +124,57 @@ class TestLRUContainer(unittest.TestCase): self.assertRaises(NotImplementedError, d.__iter__) + +class TestHTTPHeaderDict(unittest.TestCase): + def setUp(self): + self.d = HTTPHeaderDict(A='foo') + self.d.add('a', 'bar') + + def test_overwriting_with_setitem_replaces(self): + d = HTTPHeaderDict() + + d['A'] = 'foo' + self.assertEqual(d['a'], 'foo') + + d['a'] = 'bar' + self.assertEqual(d['A'], 'bar') + + def test_copy(self): + h = self.d.copy() + self.assertTrue(self.d is not h) + self.assertEqual(self.d, h) + + def test_add(self): + d = HTTPHeaderDict() + + d['A'] = 'foo' + d.add('a', 'bar') + + self.assertEqual(d['a'], 'foo, bar') + self.assertEqual(d['A'], 'foo, bar') + + def test_getlist(self): + self.assertEqual(self.d.getlist('a'), ['foo', 'bar']) + self.assertEqual(self.d.getlist('A'), ['foo', 'bar']) + self.assertEqual(self.d.getlist('b'), []) + + def test_delitem(self): + del self.d['a'] + self.assertFalse('a' in self.d) + self.assertFalse('A' in self.d) + + def test_equal(self): + b = HTTPHeaderDict({'a': 'foo, bar'}) + self.assertEqual(self.d, b) + c = [('a', 'foo, bar')] + self.assertNotEqual(self.d, c) + + def test_len(self): + self.assertEqual(len(self.d), 1) + + def test_repr(self): + rep = "HTTPHeaderDict({'A': 'foo, bar'})" + self.assertEqual(repr(self.d), rep) + if __name__ == '__main__': unittest.main() diff --git a/test/test_compatibility.py b/test/test_compatibility.py new file mode 100644 index 0000000..05ee4de --- /dev/null +++ b/test/test_compatibility.py @@ -0,0 +1,23 @@ +import unittest +import warnings + +from urllib3.connection import HTTPConnection + + +class TestVersionCompatibility(unittest.TestCase): + def test_connection_strict(self): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + # strict=True is deprecated in Py33+ + conn = HTTPConnection('localhost', 12345, strict=True) + + if w: + self.fail('HTTPConnection raised warning on strict=True: %r' % w[0].message) + + def test_connection_source_address(self): + try: + # source_address does not exist in Py26- + conn = HTTPConnection('localhost', 12345, source_address='127.0.0.1') + except TypeError as e: + self.fail('HTTPConnection raised TypeError on source_adddress: %r' % e) diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index ac1768e..02229cf 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -13,10 +13,9 @@ from urllib3.exceptions import ( HostChangedError, MaxRetryError, SSLError, - ReadTimeoutError, ) -from socket import error as SocketError, timeout as SocketTimeout +from socket import error as SocketError from ssl import SSLError as BaseSSLError try: # Python 3 @@ -39,6 +38,11 @@ class TestConnectionPool(unittest.TestCase): ('http://google.com/', 'http://google.com'), ('http://google.com/', 'http://google.com/abra/cadabra'), ('http://google.com:42/', 'http://google.com:42/abracadabra'), + # Test comparison using default ports + ('http://google.com:80/', 'http://google.com/abracadabra'), + ('http://google.com/', 'http://google.com:80/abracadabra'), + ('https://google.com:443/', 'https://google.com/abracadabra'), + ('https://google.com/', 'https://google.com:443/abracadabra'), ] for a, b in same_host: @@ -51,11 +55,22 @@ class TestConnectionPool(unittest.TestCase): ('http://yahoo.com/', 'http://google.com/'), ('http://google.com:42', 'https://google.com/abracadabra'), ('http://google.com', 'https://google.net/'), + # Test comparison with default ports + ('http://google.com:42', 'http://google.com'), + ('https://google.com:42', 'https://google.com'), + ('http://google.com:443', 'http://google.com'), + ('https://google.com:80', 'https://google.com'), + ('http://google.com:443', 'https://google.com'), + ('https://google.com:80', 'http://google.com'), + ('https://google.com:443', 'http://google.com'), + ('http://google.com:80', 'https://google.com'), ] for a, b in not_same_host: c = connection_from_url(a) self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) + c = connection_from_url(b) + self.assertFalse(c.is_same_host(a), "%s =? %s" % (b, a)) def test_max_connections(self): @@ -128,9 +143,8 @@ class TestConnectionPool(unittest.TestCase): self.assertEqual(pool.pool.qsize(), POOL_SIZE) - #make sure that all of the exceptions return the connection to the pool - _test(Empty, ReadTimeoutError) - _test(SocketTimeout, ReadTimeoutError) + # Make sure that all of the exceptions return the connection to the pool + _test(Empty, EmptyPoolError) _test(BaseSSLError, SSLError) _test(CertificateError, SSLError) diff --git a/test/test_exceptions.py b/test/test_exceptions.py index e20649b..4190a61 100644 --- a/test/test_exceptions.py +++ b/test/test_exceptions.py @@ -11,25 +11,36 @@ from urllib3.connectionpool import HTTPConnectionPool class TestPickle(unittest.TestCase): - def cycle(self, item): + def verify_pickling(self, item): return pickle.loads(pickle.dumps(item)) def test_exceptions(self): - assert self.cycle(HTTPError(None)) - assert self.cycle(MaxRetryError(None, None, None)) - assert self.cycle(LocationParseError(None)) - assert self.cycle(ConnectTimeoutError(None)) + assert self.verify_pickling(HTTPError(None)) + assert self.verify_pickling(MaxRetryError(None, None, None)) + assert self.verify_pickling(LocationParseError(None)) + assert self.verify_pickling(ConnectTimeoutError(None)) def test_exceptions_with_objects(self): - assert self.cycle(HTTPError('foo')) - assert self.cycle(MaxRetryError(HTTPConnectionPool('localhost'), - '/', None)) - assert self.cycle(LocationParseError('fake location')) - assert self.cycle(ClosedPoolError(HTTPConnectionPool('localhost'), - None)) - assert self.cycle(EmptyPoolError(HTTPConnectionPool('localhost'), - None)) - assert self.cycle(HostChangedError(HTTPConnectionPool('localhost'), - '/', None)) - assert self.cycle(ReadTimeoutError(HTTPConnectionPool('localhost'), - '/', None)) + assert self.verify_pickling( + HTTPError('foo')) + + assert self.verify_pickling( + HTTPError('foo', IOError('foo'))) + + assert self.verify_pickling( + MaxRetryError(HTTPConnectionPool('localhost'), '/', None)) + + assert self.verify_pickling( + LocationParseError('fake location')) + + assert self.verify_pickling( + ClosedPoolError(HTTPConnectionPool('localhost'), None)) + + assert self.verify_pickling( + EmptyPoolError(HTTPConnectionPool('localhost'), None)) + + assert self.verify_pickling( + HostChangedError(HTTPConnectionPool('localhost'), '/', None)) + + assert self.verify_pickling( + ReadTimeoutError(HTTPConnectionPool('localhost'), '/', None)) diff --git a/test/test_fields.py b/test/test_fields.py index 888c2d5..cdec68b 100644 --- a/test/test_fields.py +++ b/test/test_fields.py @@ -1,34 +1,39 @@ import unittest from urllib3.fields import guess_content_type, RequestField -from urllib3.packages.six import b, u +from urllib3.packages.six import u class TestRequestField(unittest.TestCase): def test_guess_content_type(self): - self.assertEqual(guess_content_type('image.jpg'), 'image/jpeg') - self.assertEqual(guess_content_type('notsure'), 'application/octet-stream') - self.assertEqual(guess_content_type(None), 'application/octet-stream') + self.assertTrue(guess_content_type('image.jpg') in + ['image/jpeg', 'image/pjpeg']) + self.assertEqual(guess_content_type('notsure'), + 'application/octet-stream') + self.assertEqual(guess_content_type(None), 'application/octet-stream') def test_create(self): - simple_field = RequestField('somename', 'data') - self.assertEqual(simple_field.render_headers(), '\r\n') - filename_field = RequestField('somename', 'data', filename='somefile.txt') - self.assertEqual(filename_field.render_headers(), '\r\n') - headers_field = RequestField('somename', 'data', headers={'Content-Length': 4}) - self.assertEqual(headers_field.render_headers(), - 'Content-Length: 4\r\n' - '\r\n') + simple_field = RequestField('somename', 'data') + self.assertEqual(simple_field.render_headers(), '\r\n') + filename_field = RequestField('somename', 'data', + filename='somefile.txt') + self.assertEqual(filename_field.render_headers(), '\r\n') + headers_field = RequestField('somename', 'data', + headers={'Content-Length': 4}) + self.assertEqual( + headers_field.render_headers(), 'Content-Length: 4\r\n\r\n') def test_make_multipart(self): - field = RequestField('somename', 'data') - field.make_multipart(content_type='image/jpg', content_location='/test') - self.assertEqual(field.render_headers(), - 'Content-Disposition: form-data; name="somename"\r\n' - 'Content-Type: image/jpg\r\n' - 'Content-Location: /test\r\n' - '\r\n') + field = RequestField('somename', 'data') + field.make_multipart(content_type='image/jpg', + content_location='/test') + self.assertEqual( + field.render_headers(), + 'Content-Disposition: form-data; name="somename"\r\n' + 'Content-Type: image/jpg\r\n' + 'Content-Location: /test\r\n' + '\r\n') def test_render_parts(self): field = RequestField('somename', 'data') diff --git a/test/test_filepost.py b/test/test_filepost.py index ca33d61..390dbb3 100644 --- a/test/test_filepost.py +++ b/test/test_filepost.py @@ -124,7 +124,7 @@ class TestMultipartEncoding(unittest.TestCase): encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) - self.assertEquals(encoded, + self.assertEqual(encoded, b'--' + b(BOUNDARY) + b'\r\n' b'Content-Type: image/jpeg\r\n' b'\r\n' diff --git a/test/test_response.py b/test/test_response.py index 90d34eb..ecfcbee 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -5,6 +5,25 @@ from io import BytesIO, BufferedReader from urllib3.response import HTTPResponse from urllib3.exceptions import DecodeError + +from base64 import b64decode + +# A known random (i.e, not-too-compressible) payload generated with: +# "".join(random.choice(string.printable) for i in xrange(512)) +# .encode("zlib").encode("base64") +# Randomness in tests == bad, and fixing a seed may not be sufficient. +ZLIB_PAYLOAD = b64decode(b"""\ +eJwFweuaoQAAANDfineQhiKLUiaiCzvuTEmNNlJGiL5QhnGpZ99z8luQfe1AHoMioB+QSWHQu/L+ +lzd7W5CipqYmeVTBjdgSATdg4l4Z2zhikbuF+EKn69Q0DTpdmNJz8S33odfJoVEexw/l2SS9nFdi +pis7KOwXzfSqarSo9uJYgbDGrs1VNnQpT9f8zAorhYCEZronZQF9DuDFfNK3Hecc+WHLnZLQptwk +nufw8S9I43sEwxsT71BiqedHo0QeIrFE01F/4atVFXuJs2yxIOak3bvtXjUKAA6OKnQJ/nNvDGKZ +Khe5TF36JbnKVjdcL1EUNpwrWVfQpFYJ/WWm2b74qNeSZeQv5/xBhRdOmKTJFYgO96PwrHBlsnLn +a3l0LwJsloWpMbzByU5WLbRE6X5INFqjQOtIwYz5BAlhkn+kVqJvWM5vBlfrwP42ifonM5yF4ciJ +auHVks62997mNGOsM7WXNG3P98dBHPo2NhbTvHleL0BI5dus2JY81MUOnK3SGWLH8HeWPa1t5KcW +S5moAj5HexY/g/F8TctpxwsvyZp38dXeLDjSQvEQIkF7XR3YXbeZgKk3V34KGCPOAeeuQDIgyVhV +nP4HF2uWHA==""") + + class TestLegacyResponse(unittest.TestCase): def test_getheaders(self): headers = {'host': 'example.com'} @@ -167,6 +186,23 @@ class TestResponse(unittest.TestCase): self.assertEqual(next(stream), b'o') self.assertRaises(StopIteration, next, stream) + def test_streaming_tell(self): + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2, decode_content=False) + + position = 0 + + position += len(next(stream)) + self.assertEqual(2, position) + self.assertEqual(position, resp.tell()) + + position += len(next(stream)) + self.assertEqual(3, position) + self.assertEqual(position, resp.tell()) + + self.assertRaises(StopIteration, next, stream) + def test_gzipped_streaming(self): import zlib compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) @@ -182,6 +218,78 @@ class TestResponse(unittest.TestCase): self.assertEqual(next(stream), b'oo') self.assertRaises(StopIteration, next, stream) + def test_gzipped_streaming_tell(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + uncompressed_data = b'foo' + data = compress.compress(uncompressed_data) + data += compress.flush() + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'gzip'}, + preload_content=False) + stream = resp.stream() + + # Read everything + payload = next(stream) + self.assertEqual(payload, uncompressed_data) + + self.assertEqual(len(data), resp.tell()) + + self.assertRaises(StopIteration, next, stream) + + def test_deflate_streaming_tell_intermediate_point(self): + # Ensure that ``tell()`` returns the correct number of bytes when + # part-way through streaming compressed content. + import zlib + + NUMBER_OF_READS = 10 + + class MockCompressedDataReading(BytesIO): + """ + A ByteIO-like reader returning ``payload`` in ``NUMBER_OF_READS`` + calls to ``read``. + """ + + def __init__(self, payload, payload_part_size): + self.payloads = [ + payload[i*payload_part_size:(i+1)*payload_part_size] + for i in range(NUMBER_OF_READS+1)] + + assert b"".join(self.payloads) == payload + + def read(self, _): + # Amount is unused. + if len(self.payloads) > 0: + return self.payloads.pop(0) + return b"" + + uncompressed_data = zlib.decompress(ZLIB_PAYLOAD) + + payload_part_size = len(ZLIB_PAYLOAD) // NUMBER_OF_READS + fp = MockCompressedDataReading(ZLIB_PAYLOAD, payload_part_size) + resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + stream = resp.stream() + + parts_positions = [(part, resp.tell()) for part in stream] + end_of_stream = resp.tell() + + self.assertRaises(StopIteration, next, stream) + + parts, positions = zip(*parts_positions) + + # Check that the payload is equal to the uncompressed data + payload = b"".join(parts) + self.assertEqual(uncompressed_data, payload) + + # Check that the positions in the stream are correct + expected = [(i+1)*payload_part_size for i in range(NUMBER_OF_READS)] + self.assertEqual(expected, list(positions)) + + # Check that the end of the stream is in the correct place + self.assertEqual(len(ZLIB_PAYLOAD), end_of_stream) + def test_deflate_streaming(self): import zlib data = zlib.compress(b'foo') @@ -244,6 +352,11 @@ class TestResponse(unittest.TestCase): self.assertEqual(next(stream), b'o') self.assertRaises(StopIteration, next, stream) + def test_get_case_insensitive_headers(self): + headers = {'host': 'example.com'} + r = HTTPResponse(headers=headers) + self.assertEqual(r.headers.get('host'), 'example.com') + self.assertEqual(r.headers.get('Host'), 'example.com') if __name__ == '__main__': unittest.main() diff --git a/test/test_util.py b/test/test_util.py index b465fef..ebd3b5f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -64,7 +64,7 @@ class TestUtil(unittest.TestCase): } for url, expected_host in url_host_map.items(): returned_host = get_host(url) - self.assertEquals(returned_host, expected_host) + self.assertEqual(returned_host, expected_host) def test_invalid_host(self): # TODO: Add more tests @@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase): 'http://google.com/': Url('http', host='google.com', path='/'), 'http://google.com': Url('http', host='google.com'), 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), + + # Path/query/fragment '': Url(), '/': Url(path='/'), '?': Url(path='', query=''), @@ -93,10 +95,22 @@ class TestUtil(unittest.TestCase): '/foo': Url(path='/foo'), '/foo?bar=baz': Url(path='/foo', query='bar=baz'), '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), + + # Port + 'http://google.com/': Url('http', host='google.com', path='/'), + 'http://google.com:80/': Url('http', host='google.com', port=80, path='/'), + 'http://google.com:/': Url('http', host='google.com', path='/'), + 'http://google.com:80': Url('http', host='google.com', port=80), + 'http://google.com:': Url('http', host='google.com'), + + # Auth + 'http://foo:bar@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'), + 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'), + 'http://foo:bar@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'), } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) - self.assertEquals(returned_url, expected_url) + self.assertEqual(returned_url, expected_url) def test_parse_url_invalid_IPv6(self): self.assertRaises(ValueError, parse_url, '[::1') @@ -115,7 +129,7 @@ class TestUtil(unittest.TestCase): } for url, expected_request_uri in url_host_map.items(): returned_url = parse_url(url) - self.assertEquals(returned_url.request_uri, expected_request_uri) + self.assertEqual(returned_url.request_uri, expected_request_uri) def test_netloc(self): url_netloc_map = { @@ -126,7 +140,7 @@ class TestUtil(unittest.TestCase): } for url, expected_netloc in url_netloc_map.items(): - self.assertEquals(parse_url(url).netloc, expected_netloc) + self.assertEqual(parse_url(url).netloc, expected_netloc) def test_make_headers(self): self.assertEqual( @@ -157,6 +171,9 @@ class TestUtil(unittest.TestCase): make_headers(basic_auth='foo:bar'), {'authorization': 'Basic Zm9vOmJhcg=='}) + self.assertEqual( + make_headers(proxy_basic_auth='foo:bar'), + {'proxy-authorization': 'Basic Zm9vOmJhcg=='}) def test_split_first(self): test_cases = { @@ -250,6 +267,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(timeout.read_timeout, None) self.assertEqual(timeout.total, None) + timeout = Timeout(5) + self.assertEqual(timeout.total, 5) + def test_timeout_str(self): timeout = Timeout(connect=1, read=2, total=3) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index a81ab9c..6a4f31a 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: urllib3 -Version: 1.7.1 +Version: 1.8 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -121,10 +121,62 @@ Description: ======= Changes ======= + 1.8 (2014-03-04) + ++++++++++++++++ + + * Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in + username, and blank ports like 'hostname:'). + + * New ``urllib3.connection`` module which contains all the HTTPConnection + objects. + + * Several ``urllib3.util.Timeout``-related fixes. Also changed constructor + signature to a more sensible order. [Backwards incompatible] + (Issues #252, #262, #263) + + * Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274) + + * Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which + returns the number of bytes read so far. (Issue #277) + + * Support for platforms without threading. (Issue #289) + + * Expand default-port comparison in ``HTTPConnectionPool.is_same_host`` + to allow a pool with no specified port to be considered equal to to an + HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305) + + * Improved default SSL/TLS settings to avoid vulnerabilities. + (Issue #309) + + * Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors. + (Issue #310) + + * Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests + will send the entire HTTP request ~200 milliseconds faster; however, some of + the resulting TCP packets will be smaller. (Issue #254) + + * Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl`` + from the default 64 to 1024 in a single certificate. (Issue #318) + + * Headers are now passed and stored as a custom + ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``. + (Issue #329, #333) + + * Headers no longer lose their case on Python 3. (Issue #236) + + * ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA + certificates on inject. (Issue #332) + + * Requests with ``retries=False`` will immediately raise any exceptions without + wrapping them in ``MaxRetryError``. (Issue #348) + + * Fixed open socket leak with SSL-related failures. (Issue #344, #348) + + 1.7.1 (2013-09-25) ++++++++++++++++++ - * Added granular timeout support with new `urllib3.util.Timeout` class. + * Added granular timeout support with new ``urllib3.util.Timeout`` class. (Issue #231) * Fixed Python 3.4 support. (Issue #238) diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 32759d9..a5170fb 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -11,9 +11,8 @@ dummyserver/handlers.py dummyserver/proxy.py dummyserver/server.py dummyserver/testcase.py -test/__init__.py -test/benchmark.py test/test_collections.py +test/test_compatibility.py test/test_connectionpool.py test/test_exceptions.py test/test_fields.py @@ -24,6 +23,7 @@ test/test_response.py test/test_util.py urllib3/__init__.py urllib3/_collections.py +urllib3/connection.py urllib3/connectionpool.py urllib3/exceptions.py urllib3/fields.py @@ -42,4 +42,5 @@ urllib3/contrib/pyopenssl.py urllib3/packages/__init__.py urllib3/packages/ordered_dict.py urllib3/packages/six.py -urllib3/packages/ssl_match_hostname/__init__.py
\ No newline at end of file +urllib3/packages/ssl_match_hostname/__init__.py +urllib3/packages/ssl_match_hostname/_implementation.py
\ No newline at end of file diff --git a/urllib3/__init__.py b/urllib3/__init__.py index eed7006..086387f 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.7.1' +__version__ = '1.8' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 282b8d5..9cea3a4 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -4,16 +4,26 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import MutableMapping -from threading import RLock +from collections import Mapping, MutableMapping +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict +from .packages.six import itervalues -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] _Null = object() @@ -92,3 +102,104 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: return self._container.keys() + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 2616. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + + If you want to access the raw headers with their original casing + for debugging purposes you can access the private ``._data`` attribute + which is a normal python ``dict`` that maps the case-insensitive key to a + list of tuples stored as (case-sensitive-original-name, value). Using the + structure from above as our example: + + >>> headers._data + {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], + 'content-length': [('content-length', '7')]} + """ + + def __init__(self, headers=None, **kwargs): + self._data = {} + if headers is None: + headers = {} + self.update(headers, **kwargs) + + def add(self, key, value): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + self._data.setdefault(key.lower(), []).append((key, value)) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + return self[key].split(', ') if key in self else [] + + def copy(self): + h = HTTPHeaderDict() + for key in self._data: + for rawkey, value in self._data[key]: + h.add(rawkey, value) + return h + + def __eq__(self, other): + if not isinstance(other, Mapping): + return False + other = HTTPHeaderDict(other) + return dict((k1, self[k1]) for k1 in self._data) == \ + dict((k2, other[k2]) for k2 in other._data) + + def __getitem__(self, key): + values = self._data[key.lower()] + return ', '.join(value[1] for value in values) + + def __setitem__(self, key, value): + self._data[key.lower()] = [(key, value)] + + def __delitem__(self, key): + del self._data[key.lower()] + + def __len__(self): + return len(self._data) + + def __iter__(self): + for headers in itervalues(self._data): + yield headers[0][0] + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/urllib3/connection.py b/urllib3/connection.py new file mode 100644 index 0000000..662bd2e --- /dev/null +++ b/urllib3/connection.py @@ -0,0 +1,195 @@ +# urllib3/connection.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import sys +import socket +from socket import timeout as SocketTimeout + +try: # Python 3 + from http.client import HTTPConnection as _HTTPConnection, HTTPException +except ImportError: + from httplib import HTTPConnection as _HTTPConnection, HTTPException + +class DummyConnection(object): + "Used to detect a failed ConnectionCls import." + pass + +try: # Compiled with SSL? + ssl = None + HTTPSConnection = DummyConnection + + class BaseSSLError(BaseException): + pass + + try: # Python 3 + from http.client import HTTPSConnection as _HTTPSConnection + except ImportError: + from httplib import HTTPSConnection as _HTTPSConnection + + import ssl + BaseSSLError = ssl.SSLError + +except (ImportError, AttributeError): # Platform-specific: No SSL. + pass + +from .exceptions import ( + ConnectTimeoutError, +) +from .packages.ssl_match_hostname import match_hostname +from .packages import six +from .util import ( + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) + + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + """ + + default_port = port_by_scheme['http'] + + # By default, disable Nagle's Algorithm. + tcp_nodelay = 1 + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + + if sys.version_info < (2, 7): # Python 2.6 and earlier + kw.pop('source_address', None) + self.source_address = None + + _HTTPConnection.__init__(self, *args, **kw) + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it + + :return: a new socket connection + """ + extra_args = [] + if self.source_address: # Python 2.7+ + extra_args.append(self.source_address) + + conn = socket.create_connection( + (self.host, self.port), + self.timeout, + *extra_args + ) + conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + return conn + + def _prepare_conn(self, conn): + self.sock = conn + if self._tunnel_host: + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + + HTTPConnection.__init__(self, host, port, + strict=strict, + timeout=timeout, + source_address=source_address) + + self.key_file = key_file + self.cert_file = cert_file + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) + + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ssl_version = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def connect(self): + # Add certificate verification + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout, + ) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) + + +if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection + HTTPSConnection = VerifiedHTTPSConnection diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 691d4e2..6d0dbb1 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -11,134 +11,48 @@ from socket import error as SocketError, timeout as SocketTimeout import socket try: # Python 3 - from http.client import HTTPConnection, HTTPException - from http.client import HTTP_PORT, HTTPS_PORT -except ImportError: - from httplib import HTTPConnection, HTTPException - from httplib import HTTP_PORT, HTTPS_PORT - -try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full import Queue as _ # Platform-specific: Windows -try: # Compiled with SSL? - HTTPSConnection = object - - class BaseSSLError(BaseException): - pass - - ssl = None - - try: # Python 3 - from http.client import HTTPSConnection - except ImportError: - from httplib import HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass - - from .exceptions import ( ClosedPoolError, + ConnectionError, ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, + TimeoutError, ReadTimeoutError, ProxyError, ) -from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages.ssl_match_hostname import CertificateError from .packages import six +from .connection import ( + port_by_scheme, + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) from .request import RequestMethods from .response import HTTPResponse from .util import ( assert_fingerprint, get_host, is_connection_dropped, - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, Timeout, ) + xrange = six.moves.xrange log = logging.getLogger(__name__) _Default = object() -port_by_scheme = { - 'http': HTTP_PORT, - 'https': HTTPS_PORT, -} - - -## Connection objects (extension of httplib) - -class VerifiedHTTPSConnection(HTTPSConnection): - """ - Based on httplib.HTTPSConnection but wraps the socket with - SSL certification. - """ - cert_reqs = None - ca_certs = None - ssl_version = None - - def set_cert(self, key_file=None, cert_file=None, - cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): - - self.key_file = key_file - self.cert_file = cert_file - self.cert_reqs = cert_reqs - self.ca_certs = ca_certs - self.assert_hostname = assert_hostname - self.assert_fingerprint = assert_fingerprint - - def connect(self): - # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) - resolved_ssl_version = resolve_ssl_version(self.ssl_version) - - if self._tunnel_host: - self.sock = sock - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - - # Wrap socket using verification with the root certs in - # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=resolved_cert_reqs, - ca_certs=self.ca_certs, - server_hostname=self.host, - ssl_version=resolved_ssl_version) - - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) - - ## Pool objects class ConnectionPool(object): @@ -218,6 +132,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = 'http' + ConnectionCls = HTTPConnection def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, @@ -250,19 +165,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPConnection`. + Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - - return HTTPConnection(host=self.host, port=self.port, - timeout=self.timeout.connect_timeout, - **extra_params) + conn = self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + strict=self.strict) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + return conn def _get_conn(self, timeout=None): """ @@ -319,8 +235,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): pass except Full: # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + log.warning( + "Connection pool is full, discarding connection: %s" % + self.host) # Connection never got put back into the pool, close it. if conn: @@ -341,7 +258,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ - Perform a request on a given httplib connection object taken from our + Perform a request on a given urllib connection object taken from our pool. :param conn: @@ -362,7 +279,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # conn.request() calls httplib.*.request, not the method in - # request.py. It also calls makefile (recv) on the socket + # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) except SocketTimeout: raise ConnectTimeoutError( @@ -371,11 +288,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr - if hasattr(conn, 'sock') and \ - read_timeout is not None and \ - read_timeout is not Timeout.DEFAULT_TIMEOUT: + if hasattr(conn, 'sock'): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -385,7 +300,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - conn.sock.settimeout(read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) # Receive the response from the server try: @@ -397,6 +315,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) + except BaseSSLError as e: + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") + + raise + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error @@ -404,8 +332,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - raise + raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -441,9 +369,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) + # Use explicit default port for comparison when none is given if self.port and not port: - # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None return (scheme, host, port) == (self.scheme, self.host, self.port) @@ -482,10 +412,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param retries: Number of retries to allow before raising a MaxRetryError exception. + If `False`, then retries are disabled and any exception is raised + immediately. :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307, 308). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -519,7 +452,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0: + if retries < 0 and retries is not False: raise MaxRetryError(self, url) if release_conn is None: @@ -531,6 +464,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = None + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + try: # Request a connection from the queue conn = self._get_conn(timeout=pool_timeout) @@ -558,38 +502,41 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.read()``) except Empty: - # Timed out by queue - raise ReadTimeoutError( - self, url, "Read timed out, no pool connections are available.") + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") - except SocketTimeout: - # Timed out by socket - raise ReadTimeoutError(self, url, "Read timed out.") - - except BaseSSLError as e: - # SSL certificate error - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + except (BaseSSLError, CertificateError) as e: + # Release connection unconditionally because there is no way to + # close it externally in case of exception. + release_conn = True raise SSLError(e) - except CertificateError as e: - # Name mismatch - raise SSLError(e) + except (TimeoutError, HTTPException, SocketError) as e: + if conn: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn.close() + conn = None - except (HTTPException, SocketError) as e: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) + if not retries: + if isinstance(e, TimeoutError): + # TimeoutError is exempt from MaxRetryError-wrapping. + # FIXME: ... Not sure why. Add a reason here. + raise - # Connection broken, discard. It will be replaced next _get_conn(). - conn = None - # This is necessary so we can access e below - err = e + # Wrap unexpected exceptions with the most appropriate + # module-level exception and re-raise. + if isinstance(e, SocketError) and self.proxy: + raise ProxyError('Cannot connect to proxy.', e) + + if retries is False: + raise ConnectionError('Connection failed.', e) - if retries == 0: raise MaxRetryError(self, url, e) + # Keep track of the error for the retry warning. + err = e + finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -599,8 +546,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, err, url)) + log.warning("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, @@ -608,7 +555,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location: + if redirect_location and retries is not False: if response.status == 303: method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) @@ -626,7 +573,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`httplib.HTTPSConnection`. + instead of :class:`.HTTPSConnection`. :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. @@ -639,6 +586,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = 'https' + ConnectionCls = HTTPSConnection def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, @@ -658,33 +606,33 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, connection): + def _prepare_conn(self, conn): """ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` and establish the tunnel if proxy is used. """ - if isinstance(connection, VerifiedHTTPSConnection): - connection.set_cert(key_file=self.key_file, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - connection.ssl_version = self.ssl_version + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version if self.proxy is not None: # Python 2.7+ try: - set_tunnel = connection.set_tunnel + set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = connection._set_tunnel + set_tunnel = conn._set_tunnel set_tunnel(self.host, self.port, self.proxy_headers) # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. - connection.connect() + conn.connect() - return connection + return conn def _new_conn(self): """ @@ -694,28 +642,30 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + # Platform-specific: Python without ssl + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + actual_host = self.host actual_port = self.port if self.proxy is not None: actual_host = self.proxy.host actual_port = self.proxy.port - if not ssl: # Platform-specific: Python compiled without +ssl - if not HTTPSConnection or HTTPSConnection is object: - raise SSLError("Can't connect to HTTPS URL because the SSL " - "module is not available.") - connection_class = HTTPSConnection - else: - connection_class = VerifiedHTTPSConnection - extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - connection = connection_class(host=actual_host, port=actual_port, - timeout=self.timeout.connect_timeout, - **extra_params) - return self._prepare_conn(connection) + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + + return self._prepare_conn(conn) def connection_from_url(url, **kw): diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index d43bcd6..7c513f3 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -1,4 +1,4 @@ -'''SSL with SNI-support for Python 2. +'''SSL with SNI_-support for Python 2. This needs the following packages installed: @@ -18,17 +18,36 @@ your application begins using ``urllib3``, like this:: Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +encryption in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +Module Variables +---------------- + +:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. + Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: + ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) + ''' from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName +from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder -from socket import _fileobject +from pyasn1.type import univ, constraint +from socket import _fileobject, timeout import ssl +import select from cStringIO import StringIO -from .. import connectionpool +from .. import connection from .. import util __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] @@ -49,26 +68,54 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ + "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ + "!aNULL:!MD5:!DSS" + orig_util_HAS_SNI = util.HAS_SNI -orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket +orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket def inject_into_urllib3(): 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' - connectionpool.ssl_wrap_socket = ssl_wrap_socket + connection.ssl_wrap_socket = ssl_wrap_socket util.HAS_SNI = HAS_SNI def extract_from_urllib3(): 'Undo monkey-patching by :func:`inject_into_urllib3`.' - connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket util.HAS_SNI = orig_util_HAS_SNI ### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +class SubjectAltName(BaseSubjectAltName): + '''ASN.1 implementation for subjectAltNames support''' + + # There is no limit to how many SAN certificates a certificate may have, + # however this needs to have some limit so we'll set an arbitrarily high + # limit. + sizeSpec = univ.SequenceOf.sizeSpec + \ + constraint.ValueSizeConstraint(1, 1024) + + +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. def get_subj_alt_name(peer_cert): # Search through extensions dns_name = [] @@ -101,6 +148,13 @@ def get_subj_alt_name(peer_cert): class fileobject(_fileobject): + def _wait_for_sock(self): + rd, wd, ed = select.select([self._sock], [], [], + self._sock.gettimeout()) + if not rd: + raise timeout() + + def read(self, size=-1): # Use max, disallow tiny reads in a loop as they are very inefficient. # We never leave read() with any leftover data from a new recv() call @@ -118,6 +172,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -145,6 +200,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(left) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -196,6 +252,7 @@ class fileobject(_fileobject): break buffers.append(data) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue break return "".join(buffers) @@ -206,6 +263,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -233,7 +291,8 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: - continue + self._wait_for_sock() + continue if not data: break left = size - buf_len @@ -328,6 +387,15 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ctx.load_verify_locations(ca_certs, None) except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + else: + ctx.set_default_verify_paths() + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + ctx.set_options(OP_NO_COMPRESSION) + + # Set list of supported ciphersuites. + ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) @@ -336,6 +404,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: + select.select([sock], [], []) continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 98ef9ab..b4df831 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -44,6 +44,11 @@ class ProxyError(HTTPError): pass +class ConnectionError(HTTPError): + "Raised when a normal connection fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass diff --git a/urllib3/filepost.py b/urllib3/filepost.py index 4575582..e8b30bd 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -46,16 +46,15 @@ def iter_field_objects(fields): def iter_fields(fields): """ - Iterate over fields. + .. deprecated:: 1.6 - .. deprecated :: + Iterate over fields. - The addition of `~urllib3.fields.RequestField` makes this function - obsolete. Instead, use :func:`iter_field_objects`, which returns - `~urllib3.fields.RequestField` objects, instead. + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. Supports list of (k, v) tuples and dicts. - """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py index 2d61ac2..dd59a75 100644 --- a/urllib3/packages/ssl_match_hostname/__init__.py +++ b/urllib3/packages/ssl_match_hostname/__init__.py @@ -1,98 +1,13 @@ -"""The match_hostname() function from Python 3.2, essential when using SSL.""" - -import re - -__version__ = '3.2.2' - -class CertificateError(ValueError): - pass - -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 - - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False - - parts = dn.split(r'.') - leftmost = parts[0] - - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() - - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - - # add the remaining fragments, ignore any wildcards - for frag in parts[1:]: - pats.append(re.escape(frag)) - - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") +try: + # Python 3.2+ + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from ._implementation import CertificateError, match_hostname + +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/urllib3/packages/ssl_match_hostname/_implementation.py b/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 0000000..52f4287 --- /dev/null +++ b/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,105 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html + +import re + +__version__ = '3.4.0.2' + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index e7f8667..f18ff2b 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -176,7 +176,7 @@ class ProxyManager(PoolManager): Behaves just like :class:`PoolManager`, but sends all requests through the defined proxy, using the CONNECT method for HTTPS URLs. - :param poxy_url: + :param proxy_url: The URL of the proxy to be used. :param proxy_headers: @@ -245,12 +245,11 @@ class ProxyManager(PoolManager): u = parse_url(url) if u.scheme == "http": - # It's too late to set proxy headers on per-request basis for - # tunnelled HTTPS connections, should use - # constructor's proxy_headers instead. + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. kw['headers'] = self._set_proxy_headers(url, kw.get('headers', self.headers)) - kw['headers'].update(self.proxy_headers) return super(ProxyManager, self).urlopen(method, url, redirect, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index 66a9a0e..2a92cc2 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -45,7 +45,6 @@ class RequestMethods(object): """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) def __init__(self, headers=None): self.headers = headers or {} diff --git a/urllib3/response.py b/urllib3/response.py index 4efff5a..db44182 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -9,6 +9,7 @@ import logging import zlib import io +from ._collections import HTTPHeaderDict from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type from .util import is_fp_closed @@ -79,7 +80,10 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = headers or {} + + self.headers = HTTPHeaderDict() + if headers: + self.headers.update(headers) self.status = status self.version = version self.reason = reason @@ -90,6 +94,7 @@ class HTTPResponse(io.IOBase): self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response + self._fp_bytes_read = 0 self._pool = pool self._connection = connection @@ -129,6 +134,14 @@ class HTTPResponse(io.IOBase): if self._fp: return self.read(cache_content=True) + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -183,6 +196,8 @@ class HTTPResponse(io.IOBase): self._fp.close() flush_decoder = True + self._fp_bytes_read += len(data) + try: if decode_content and self._decoder: data = self._decoder.decompress(data) @@ -238,17 +253,9 @@ class HTTPResponse(io.IOBase): with ``original_response=r``. """ - # Normalize headers between different versions of Python - headers = {} + headers = HTTPHeaderDict() for k, v in r.getheaders(): - # Python 3: Header keys are returned capitalised - k = k.lower() - - has_value = headers.get(k) - if has_value: # Python 3: Repeating header keys are unmerged. - v = ', '.join([has_value, v]) - - headers[k] = v + headers.add(k, v) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) diff --git a/urllib3/util.py b/urllib3/util.py index 266c9ed..bd26631 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -80,14 +80,13 @@ class Timeout(object): :type read: integer, float, or None :param total: - The maximum amount of time to wait for an HTTP request to connect and - return. This combines the connect and read timeouts into one. In the + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the event that both a connect timeout and a total are specified, or a read timeout and a total are specified, the shorter timeout will be applied. Defaults to None. - :type total: integer, float, or None .. note:: @@ -101,18 +100,23 @@ class Timeout(object): `total`. In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, not - the total amount of time for the request to return a complete response. - As an example, you may want a request to return within 7 seconds or - fail, so you set the ``total`` timeout to 7 seconds. If the server - sends one byte to you every 5 seconds, the request will **not** trigger - time out. This case is admittedly rare. + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. """ #: A sentinel object representing the default timeout value DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - def __init__(self, connect=_Default, read=_Default, total=None): + def __init__(self, total=None, connect=_Default, read=_Default): self._connect = self._validate_timeout(connect, 'connect') self._read = self._validate_timeout(read, 'read') self.total = self._validate_timeout(total, 'total') @@ -372,7 +376,8 @@ def parse_url(url): # Auth if '@' in url: - auth, url = url.split('@', 1) + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) # IPv6 if url and url[0] == '[': @@ -386,10 +391,14 @@ def parse_url(url): if not host: host = _host - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - - port = int(port) + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None elif not host and url: host = url @@ -417,7 +426,7 @@ def get_host(url): def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): + basic_auth=None, proxy_basic_auth=None): """ Shortcuts for generating request headers. @@ -438,6 +447,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, Colon-separated username:password string for 'authorization: basic ...' auth header. + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + Example: :: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") @@ -465,6 +478,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, headers['authorization'] = 'Basic ' + \ b64encode(six.b(basic_auth)).decode('utf-8') + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(six.b(proxy_basic_auth)).decode('utf-8') + return headers @@ -603,6 +620,11 @@ if SSLContext is not None: # Python 3.2+ """ context = SSLContext(ssl_version) context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + if ca_certs: try: context.load_verify_locations(ca_certs) |