From 52980ebd0a4eb75acf055a2256e095772c1fa7c6 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:35 -0700 Subject: Imported Upstream version 1.7.1 --- CHANGES.rst | 48 ++++ CONTRIBUTORS.txt | 30 +++ PKG-INFO | 70 +++++- README.rst | 18 +- dummyserver/handlers.py | 57 ++++- dummyserver/proxy.py | 137 ++++++++++++ dummyserver/server.py | 50 +++-- dummyserver/testcase.py | 86 +++++-- setup.cfg | 1 + test-requirements.txt | 5 +- test/__init__.py | 0 test/benchmark.py | 77 +++++++ test/test_connectionpool.py | 29 ++- test/test_exceptions.py | 30 ++- test/test_fields.py | 44 ++++ test/test_filepost.py | 14 ++ test/test_proxymanager.py | 18 +- test/test_response.py | 135 ++++++++++- test/test_util.py | 155 +++++++++++-- urllib3.egg-info/PKG-INFO | 70 +++++- urllib3.egg-info/SOURCES.txt | 5 + urllib3/__init__.py | 4 +- urllib3/_collections.py | 16 +- urllib3/connectionpool.py | 286 ++++++++++++++++++------ urllib3/contrib/ntlmpool.py | 2 +- urllib3/contrib/pyopenssl.py | 193 +++++++++++++++- urllib3/exceptions.py | 54 ++++- urllib3/fields.py | 177 +++++++++++++++ urllib3/filepost.py | 57 ++--- urllib3/packages/ssl_match_hostname/__init__.py | 67 ++++-- urllib3/poolmanager.py | 121 +++++++--- urllib3/request.py | 2 +- urllib3/response.py | 72 +++++- urllib3/util.py | 260 ++++++++++++++++++++- 34 files changed, 2112 insertions(+), 278 deletions(-) create mode 100755 dummyserver/proxy.py create mode 100644 test/__init__.py create mode 100644 test/benchmark.py create mode 100644 test/test_fields.py create mode 100644 urllib3/fields.py diff --git a/CHANGES.rst b/CHANGES.rst index a2a0da8..891fd79 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,54 @@ Changes ======= +1.7.1 (2013-09-25) +++++++++++++++++++ + +* Added granular timeout support with new `urllib3.util.Timeout` class. + (Issue #231) + +* Fixed Python 3.4 support. (Issue #238) + + +1.7 (2013-08-14) +++++++++++++++++ + +* More exceptions are now pickle-able, with tests. (Issue #174) + +* Fixed redirecting with relative URLs in Location header. (Issue #178) + +* Support for relative urls in ``Location: ...`` header. (Issue #179) + +* ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus + file-like functionality. (Issue #187) + +* Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will + skip hostname verification for SSL connections. (Issue #194) + +* New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a + generator wrapped around ``.read(...)``. (Issue #198) + +* IPv6 url parsing enforces brackets around the hostname. (Issue #199) + +* Fixed thread race condition in + ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204) + +* ``ProxyManager`` requests now include non-default port in ``Host: ...`` + header. (Issue #217) + +* Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139) + +* New ``RequestField`` object can be passed to the ``fields=...`` param which + can specify headers. (Issue #220) + +* Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails. + (Issue #221) + +* Use international headers when posting file names. (Issue #119) + +* Improved IPv6 support. (Issue #203) + + 1.6 (2013-04-25) ++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 19f59ce..e1aca42 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -60,5 +60,35 @@ In chronological order: * Marc Schlaich * Various bugfixes and test improvements. +* Bryce Boe + * Correct six.moves conflict + * Fixed pickle support of some exceptions + +* Boris Figovsky + * Allowed to skip SSL hostname verification + +* Cory Benfield + * Stream method for Response objects. + * Return native strings in header values. + * Generate 'Host' header when using proxies. + +* Jason Robinson + * Add missing WrappedSocket.fileno method in PyOpenSSL + +* Audrius Butkevicius + * Fixed a race condition + +* Stanislav Vitkovskiy + * Added HTTPS (CONNECT) proxy support + +* Stephen Holsapple + * Added abstraction for granular control of request fields + +* Martin von Gagern + * Support for non-ASCII header parameters + +* Kevin Burke and Pavel Kirichenko + * Support for separate connect and request timeouts + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/PKG-INFO b/PKG-INFO index 661e33b..a81ab9c 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.1 +Metadata-Version: 1.0 Name: urllib3 -Version: 1.6 +Version: 1.7.1 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -25,7 +25,7 @@ Description: ======= - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -89,14 +89,18 @@ Description: ======= Run the tests ============= - We use some external dependencies to run the urllib3 test suite. Easiest way to - run the tests is thusly from the urllib3 source root: :: + We use some external dependencies, multiple interpreters and code coverage + analysis while running test suite. Easiest way to run the tests is thusly the + ``tox`` utility: :: - $ pip install -r test-requirements.txt - $ nosetests - ..................................................... + $ tox + # [..] + py26: commands succeeded + py27: commands succeeded + py32: commands succeeded + py33: commands succeeded - Success! You could also ``pip install coverage`` to get code coverage reporting. + Note that code coverage less than 100% is regarded as a failing run. Contributing @@ -117,6 +121,54 @@ Description: ======= Changes ======= + 1.7.1 (2013-09-25) + ++++++++++++++++++ + + * Added granular timeout support with new `urllib3.util.Timeout` class. + (Issue #231) + + * Fixed Python 3.4 support. (Issue #238) + + + 1.7 (2013-08-14) + ++++++++++++++++ + + * More exceptions are now pickle-able, with tests. (Issue #174) + + * Fixed redirecting with relative URLs in Location header. (Issue #178) + + * Support for relative urls in ``Location: ...`` header. (Issue #179) + + * ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus + file-like functionality. (Issue #187) + + * Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will + skip hostname verification for SSL connections. (Issue #194) + + * New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a + generator wrapped around ``.read(...)``. (Issue #198) + + * IPv6 url parsing enforces brackets around the hostname. (Issue #199) + + * Fixed thread race condition in + ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204) + + * ``ProxyManager`` requests now include non-default port in ``Host: ...`` + header. (Issue #217) + + * Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139) + + * New ``RequestField`` object can be passed to the ``fields=...`` param which + can specify headers. (Issue #220) + + * Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails. + (Issue #221) + + * Use international headers when posting file names. (Issue #119) + + * Improved IPv6 support. (Issue #203) + + 1.6 (2013-04-25) ++++++++++++++++ diff --git a/README.rst b/README.rst index 75f05d8..b126647 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. +- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -81,14 +81,18 @@ pools, you should look at Run the tests ============= -We use some external dependencies to run the urllib3 test suite. Easiest way to -run the tests is thusly from the urllib3 source root: :: +We use some external dependencies, multiple interpreters and code coverage +analysis while running test suite. Easiest way to run the tests is thusly the +``tox`` utility: :: - $ pip install -r test-requirements.txt - $ nosetests - ..................................................... + $ tox + # [..] + py26: commands succeeded + py27: commands succeeded + py32: commands succeeded + py33: commands succeeded -Success! You could also ``pip install coverage`` to get code coverage reporting. +Note that code coverage less than 100% is regarded as a failing run. Contributing diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index ab48b53..bc51f31 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -87,7 +87,7 @@ class TestingApp(WSGIHandler): if request.method != method: return Response("Wrong method: %s != %s" % - (method, request.method), status='400') + (method, request.method), status='400 Bad Request') return Response() def upload(self, request): @@ -100,17 +100,18 @@ class TestingApp(WSGIHandler): if len(files_) != 1: return Response("Expected 1 file for '%s', not %d" %(param, len(files_)), - status='400') + status='400 Bad Request') file_ = files_[0] data = file_['body'] if int(size) != len(data): return Response("Wrong size: %d != %d" % - (size, len(data)), status='400') + (size, len(data)), status='400 Bad Request') if filename != file_['filename']: return Response("Wrong filename: %s != %s" % - (filename, file_.filename), status='400') + (filename, file_.filename), + status='400 Bad Request') return Response() @@ -118,7 +119,7 @@ class TestingApp(WSGIHandler): "Perform a redirect to ``target``" target = request.params.get('target', '/') headers = [('Location', target)] - return Response(status='303', headers=headers) + return Response(status='303 See Other', headers=headers) def keepalive(self, request): if request.params.get('close', b'0') == b'1': @@ -169,3 +170,49 @@ class TestingApp(WSGIHandler): def shutdown(self, request): sys.exit() + + +# RFC2231-aware replacement of internal tornado function +def _parse_header(line): + r"""Parse a Content-type like header. + + Return the main content-type and a dictionary of options. + + >>> d = _parse_header("CD: fd; foo=\"bar\"; file*=utf-8''T%C3%A4st")[1] + >>> d['file'] == 'T\u00e4st' + True + >>> d['foo'] + 'bar' + """ + import tornado.httputil + import email.utils + from urllib3.packages import six + if not six.PY3: + line = line.encode('utf-8') + parts = tornado.httputil._parseparam(';' + line) + key = next(parts) + # decode_params treats first argument special, but we already stripped key + params = [('Dummy', 'value')] + for p in parts: + i = p.find('=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1:].strip() + params.append((name, value)) + params = email.utils.decode_params(params) + params.pop(0) # get rid of the dummy again + pdict = {} + for name, value in params: + print(repr(value)) + value = email.utils.collapse_rfc2231_value(value) + if len(value) >= 2 and value[0] == '"' and value[-1] == '"': + value = value[1:-1] + pdict[name] = value + return key, pdict + +# TODO: make the following conditional as soon as we know a version +# which does not require this fix. +# See https://github.com/facebook/tornado/issues/868 +if True: + import tornado.httputil + tornado.httputil._parse_header = _parse_header diff --git a/dummyserver/proxy.py b/dummyserver/proxy.py new file mode 100755 index 0000000..aca92a7 --- /dev/null +++ b/dummyserver/proxy.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# Simple asynchronous HTTP proxy with tunnelling (CONNECT). +# +# GET/POST proxying based on +# http://groups.google.com/group/python-tornado/msg/7bea08e7a049cf26 +# +# Copyright (C) 2012 Senko Rasic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +import socket + +import tornado.httpserver +import tornado.ioloop +import tornado.iostream +import tornado.web +import tornado.httpclient + +__all__ = ['ProxyHandler', 'run_proxy'] + + +class ProxyHandler(tornado.web.RequestHandler): + SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT'] + + @tornado.web.asynchronous + def get(self): + + def handle_response(response): + if response.error and not isinstance(response.error, + tornado.httpclient.HTTPError): + self.set_status(500) + self.write('Internal server error:\n' + str(response.error)) + self.finish() + else: + self.set_status(response.code) + for header in ('Date', 'Cache-Control', 'Server', + 'Content-Type', 'Location'): + v = response.headers.get(header) + if v: + self.set_header(header, v) + if response.body: + self.write(response.body) + self.finish() + + req = tornado.httpclient.HTTPRequest(url=self.request.uri, + method=self.request.method, body=self.request.body, + headers=self.request.headers, follow_redirects=False, + allow_nonstandard_methods=True) + + client = tornado.httpclient.AsyncHTTPClient() + try: + client.fetch(req, handle_response) + except tornado.httpclient.HTTPError as e: + if hasattr(e, 'response') and e.response: + self.handle_response(e.response) + else: + self.set_status(500) + self.write('Internal server error:\n' + str(e)) + self.finish() + + @tornado.web.asynchronous + def post(self): + return self.get() + + @tornado.web.asynchronous + def connect(self): + host, port = self.request.uri.split(':') + client = self.request.connection.stream + + def read_from_client(data): + upstream.write(data) + + def read_from_upstream(data): + client.write(data) + + def client_close(data=None): + if upstream.closed(): + return + if data: + upstream.write(data) + upstream.close() + + def upstream_close(data=None): + if client.closed(): + return + if data: + client.write(data) + client.close() + + def start_tunnel(): + client.read_until_close(client_close, read_from_client) + upstream.read_until_close(upstream_close, read_from_upstream) + client.write(b'HTTP/1.0 200 Connection established\r\n\r\n') + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) + upstream = tornado.iostream.IOStream(s) + upstream.connect((host, int(port)), start_tunnel) + + +def run_proxy(port, start_ioloop=True): + """ + Run proxy on the specified port. If start_ioloop is True (default), + the tornado IOLoop will be started immediately. + """ + app = tornado.web.Application([ + (r'.*', ProxyHandler), + ]) + app.listen(port) + ioloop = tornado.ioloop.IOLoop.instance() + if start_ioloop: + ioloop.start() + +if __name__ == '__main__': + port = 8888 + if len(sys.argv) > 1: + port = int(sys.argv[1]) + + print ("Starting HTTP proxy on port %d" % port) + run_proxy(port) diff --git a/dummyserver/server.py b/dummyserver/server.py index 9031664..f4f98a4 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -11,11 +11,14 @@ import sys import threading import socket +from tornado import netutil import tornado.wsgi import tornado.httpserver import tornado.ioloop +import tornado.web from dummyserver.handlers import TestingApp +from dummyserver.proxy import ProxyHandler log = logging.getLogger(__name__) @@ -36,28 +39,29 @@ class SocketServerThread(threading.Thread): """ :param socket_handler: Callable which receives a socket argument for one request. - :param ready_lock: Lock which gets released when the socket handler is + :param ready_event: Event which gets set when the socket handler is ready to receive requests. """ def __init__(self, socket_handler, host='localhost', port=8081, - ready_lock=None): + ready_event=None): threading.Thread.__init__(self) self.socket_handler = socket_handler self.host = host - self.port = port - self.ready_lock = ready_lock + self.ready_event = ready_event def _start_server(self): sock = socket.socket() - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.bind((self.host, self.port)) + if sys.platform != 'win32': + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind((self.host, 0)) + self.port = sock.getsockname()[1] # Once listen() returns, the server socket is ready sock.listen(1) - if self.ready_lock: - self.ready_lock.release() + if self.ready_event: + self.ready_event.set() self.socket_handler(sock) sock.close() @@ -67,34 +71,44 @@ class SocketServerThread(threading.Thread): class TornadoServerThread(threading.Thread): - def __init__(self, host='localhost', port=8081, scheme='http', certs=None): + app = tornado.wsgi.WSGIContainer(TestingApp()) + + def __init__(self, host='localhost', scheme='http', certs=None, + ready_event=None): threading.Thread.__init__(self) self.host = host - self.port = port self.scheme = scheme self.certs = certs + self.ready_event = ready_event def _start_server(self): - container = tornado.wsgi.WSGIContainer(TestingApp()) - if self.scheme == 'https': - http_server = tornado.httpserver.HTTPServer(container, + http_server = tornado.httpserver.HTTPServer(self.app, ssl_options=self.certs) else: - http_server = tornado.httpserver.HTTPServer(container) + http_server = tornado.httpserver.HTTPServer(self.app) - http_server.listen(self.port, address=self.host) + family = socket.AF_INET6 if ':' in self.host else socket.AF_INET + sock, = netutil.bind_sockets(None, address=self.host, family=family) + self.port = sock.getsockname()[1] + http_server.add_sockets([sock]) return http_server def run(self): - self.server = self._start_server() self.ioloop = tornado.ioloop.IOLoop.instance() + self.server = self._start_server() + if self.ready_event: + self.ready_event.set() self.ioloop.start() def stop(self): - self.server.stop() - self.ioloop.stop() + self.ioloop.add_callback(self.server.stop) + self.ioloop.add_callback(self.ioloop.stop) + + +class ProxyServerThread(TornadoServerThread): + app = tornado.web.Application([(r'.*', ProxyHandler)]) if __name__ == '__main__': diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 73b8f2f..a2a1da1 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -1,14 +1,15 @@ import unittest - -from threading import Lock +import socket +import threading +from nose.plugins.skip import SkipTest from dummyserver.server import ( TornadoServerThread, SocketServerThread, DEFAULT_CERTS, + ProxyServerThread, ) - -# TODO: Change ports to auto-allocated? +has_ipv6 = hasattr(socket, 'has_ipv6') class SocketDummyServerTestCase(unittest.TestCase): @@ -18,19 +19,16 @@ class SocketDummyServerTestCase(unittest.TestCase): """ scheme = 'http' host = 'localhost' - port = 18080 @classmethod def _start_server(cls, socket_handler): - ready_lock = Lock() - ready_lock.acquire() + ready_event = threading.Event() cls.server_thread = SocketServerThread(socket_handler=socket_handler, - ready_lock=ready_lock, - host=cls.host, port=cls.port) + ready_event=ready_event, + host=cls.host) cls.server_thread.start() - - # Lock gets released by thread above - ready_lock.acquire() + ready_event.wait() + cls.port = cls.server_thread.port @classmethod def tearDownClass(cls): @@ -41,20 +39,19 @@ class SocketDummyServerTestCase(unittest.TestCase): class HTTPDummyServerTestCase(unittest.TestCase): scheme = 'http' host = 'localhost' - host_alt = '127.0.0.1' # Some tests need two hosts - port = 18081 + host_alt = '127.0.0.1' # Some tests need two hosts certs = DEFAULT_CERTS @classmethod def _start_server(cls): - cls.server_thread = TornadoServerThread(host=cls.host, port=cls.port, + ready_event = threading.Event() + cls.server_thread = TornadoServerThread(host=cls.host, scheme=cls.scheme, - certs=cls.certs) + certs=cls.certs, + ready_event=ready_event) cls.server_thread.start() - - # TODO: Loop-check here instead - import time - time.sleep(0.1) + ready_event.wait() + cls.port = cls.server_thread.port @classmethod def _stop_server(cls): @@ -73,5 +70,52 @@ class HTTPDummyServerTestCase(unittest.TestCase): class HTTPSDummyServerTestCase(HTTPDummyServerTestCase): scheme = 'https' host = 'localhost' - port = 18082 certs = DEFAULT_CERTS + + +class HTTPDummyProxyTestCase(unittest.TestCase): + + http_host = 'localhost' + http_host_alt = '127.0.0.1' + + https_host = 'localhost' + https_host_alt = '127.0.0.1' + https_certs = DEFAULT_CERTS + + proxy_host = 'localhost' + proxy_host_alt = '127.0.0.1' + + @classmethod + def setUpClass(cls): + cls.http_thread = TornadoServerThread(host=cls.http_host, + scheme='http') + cls.http_thread._start_server() + cls.http_port = cls.http_thread.port + + cls.https_thread = TornadoServerThread( + host=cls.https_host, scheme='https', certs=cls.https_certs) + cls.https_thread._start_server() + cls.https_port = cls.https_thread.port + + ready_event = threading.Event() + cls.proxy_thread = ProxyServerThread( + host=cls.proxy_host, ready_event=ready_event) + cls.proxy_thread.start() + ready_event.wait() + cls.proxy_port = cls.proxy_thread.port + + @classmethod + def tearDownClass(cls): + cls.proxy_thread.stop() + cls.proxy_thread.join() + + +class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase): + host = '::1' + + @classmethod + def setUpClass(cls): + if not has_ipv6: + raise SkipTest('IPv6 not available') + else: + super(IPv6HTTPDummyServerTestCase, cls).setUpClass() diff --git a/setup.cfg b/setup.cfg index 8f6983c..8f1fee7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,6 +3,7 @@ logging-clear-handlers = true with-coverage = true cover-package = urllib3 cover-min-percentage = 100 +cover-erase = true [egg_info] tag_build = diff --git a/test-requirements.txt b/test-requirements.txt index 226c13d..f7c3a50 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,4 @@ -nose +nose==1.3 +mock==1.0.1 tornado==2.4.1 -coverage +coverage==3.6 diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/benchmark.py b/test/benchmark.py new file mode 100644 index 0000000..e7049c4 --- /dev/null +++ b/test/benchmark.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +""" +Really simple rudimentary benchmark to compare ConnectionPool versus standard +urllib to demonstrate the usefulness of connection re-using. +""" +from __future__ import print_function + +import sys +import time +import urllib + +sys.path.append('../') +import urllib3 + + +# URLs to download. Doesn't matter as long as they're from the same host, so we +# can take advantage of connection re-using. +TO_DOWNLOAD = [ + 'http://code.google.com/apis/apps/', + 'http://code.google.com/apis/base/', + 'http://code.google.com/apis/blogger/', + 'http://code.google.com/apis/calendar/', + 'http://code.google.com/apis/codesearch/', + 'http://code.google.com/apis/contact/', + 'http://code.google.com/apis/books/', + 'http://code.google.com/apis/documents/', + 'http://code.google.com/apis/finance/', + 'http://code.google.com/apis/health/', + 'http://code.google.com/apis/notebook/', + 'http://code.google.com/apis/picasaweb/', + 'http://code.google.com/apis/spreadsheets/', + 'http://code.google.com/apis/webmastertools/', + 'http://code.google.com/apis/youtube/', +] + + +def urllib_get(url_list): + assert url_list + for url in url_list: + now = time.time() + r = urllib.urlopen(url) + elapsed = time.time() - now + print("Got in %0.3f: %s" % (elapsed, url)) + + +def pool_get(url_list): + assert url_list + pool = urllib3.connection_from_url(url_list[0]) + for url in url_list: + now = time.time() + r = pool.get_url(url) + elapsed = time.time() - now + print("Got in %0.3fs: %s" % (elapsed, url)) + + +if __name__ == '__main__': + print("Running pool_get ...") + now = time.time() + pool_get(TO_DOWNLOAD) + pool_elapsed = time.time() - now + + print("Running urllib_get ...") + now = time.time() + urllib_get(TO_DOWNLOAD) + urllib_elapsed = time.time() - now + + print("Completed pool_get in %0.3fs" % pool_elapsed) + print("Completed urllib_get in %0.3fs" % urllib_elapsed) + + +""" +Example results: + +Completed pool_get in 1.163s +Completed urllib_get in 2.318s +""" diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index a7e104a..ac1768e 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,6 +1,11 @@ import unittest -from urllib3.connectionpool import connection_from_url, HTTPConnectionPool +from urllib3.connectionpool import ( + connection_from_url, + HTTPConnection, + HTTPConnectionPool, +) +from urllib3.util import Timeout from urllib3.packages.ssl_match_hostname import CertificateError from urllib3.exceptions import ( ClosedPoolError, @@ -8,7 +13,7 @@ from urllib3.exceptions import ( HostChangedError, MaxRetryError, SSLError, - TimeoutError, + ReadTimeoutError, ) from socket import error as SocketError, timeout as SocketTimeout @@ -52,6 +57,7 @@ class TestConnectionPool(unittest.TestCase): c = connection_from_url(a) self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b)) + def test_max_connections(self): pool = HTTPConnectionPool(host='localhost', maxsize=1, block=True) @@ -108,6 +114,7 @@ class TestConnectionPool(unittest.TestCase): "Max retries exceeded with url: Test. " "(Caused by {0}: Test)".format(str(err.__class__))) + def test_pool_size(self): POOL_SIZE = 1 pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True) @@ -122,8 +129,8 @@ class TestConnectionPool(unittest.TestCase): self.assertEqual(pool.pool.qsize(), POOL_SIZE) #make sure that all of the exceptions return the connection to the pool - _test(Empty, TimeoutError) - _test(SocketTimeout, TimeoutError) + _test(Empty, ReadTimeoutError) + _test(SocketTimeout, ReadTimeoutError) _test(BaseSSLError, SSLError) _test(CertificateError, SSLError) @@ -166,6 +173,20 @@ class TestConnectionPool(unittest.TestCase): self.assertRaises(Empty, old_pool_queue.get, block=False) + def test_pool_timeouts(self): + pool = HTTPConnectionPool(host='localhost') + conn = pool._new_conn() + self.assertEqual(conn.__class__, HTTPConnection) + self.assertEqual(pool.timeout.__class__, Timeout) + self.assertEqual(pool.timeout._read, Timeout.DEFAULT_TIMEOUT) + self.assertEqual(pool.timeout._connect, Timeout.DEFAULT_TIMEOUT) + self.assertEqual(pool.timeout.total, None) + + pool = HTTPConnectionPool(host='localhost', timeout=3) + self.assertEqual(pool.timeout._read, 3) + self.assertEqual(pool.timeout._connect, 3) + self.assertEqual(pool.timeout.total, None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_exceptions.py b/test/test_exceptions.py index 3e02ca6..e20649b 100644 --- a/test/test_exceptions.py +++ b/test/test_exceptions.py @@ -1,19 +1,35 @@ import unittest import pickle -from urllib3.exceptions import HTTPError, MaxRetryError, LocationParseError +from urllib3.exceptions import (HTTPError, MaxRetryError, LocationParseError, + ClosedPoolError, EmptyPoolError, + HostChangedError, ReadTimeoutError, + ConnectTimeoutError) from urllib3.connectionpool import HTTPConnectionPool class TestPickle(unittest.TestCase): + def cycle(self, item): + return pickle.loads(pickle.dumps(item)) + def test_exceptions(self): - assert pickle.dumps(HTTPError(None)) - assert pickle.dumps(MaxRetryError(None, None, None)) - assert pickle.dumps(LocationParseError(None)) + assert self.cycle(HTTPError(None)) + assert self.cycle(MaxRetryError(None, None, None)) + assert self.cycle(LocationParseError(None)) + assert self.cycle(ConnectTimeoutError(None)) def test_exceptions_with_objects(self): - assert pickle.dumps(HTTPError('foo')) - assert pickle.dumps(MaxRetryError(HTTPConnectionPool('localhost'), '/', None)) - assert pickle.dumps(LocationParseError('fake location')) + assert self.cycle(HTTPError('foo')) + assert self.cycle(MaxRetryError(HTTPConnectionPool('localhost'), + '/', None)) + assert self.cycle(LocationParseError('fake location')) + assert self.cycle(ClosedPoolError(HTTPConnectionPool('localhost'), + None)) + assert self.cycle(EmptyPoolError(HTTPConnectionPool('localhost'), + None)) + assert self.cycle(HostChangedError(HTTPConnectionPool('localhost'), + '/', None)) + assert self.cycle(ReadTimeoutError(HTTPConnectionPool('localhost'), + '/', None)) diff --git a/test/test_fields.py b/test/test_fields.py new file mode 100644 index 0000000..888c2d5 --- /dev/null +++ b/test/test_fields.py @@ -0,0 +1,44 @@ +import unittest + +from urllib3.fields import guess_content_type, RequestField +from urllib3.packages.six import b, u + + +class TestRequestField(unittest.TestCase): + + def test_guess_content_type(self): + self.assertEqual(guess_content_type('image.jpg'), 'image/jpeg') + self.assertEqual(guess_content_type('notsure'), 'application/octet-stream') + self.assertEqual(guess_content_type(None), 'application/octet-stream') + + def test_create(self): + simple_field = RequestField('somename', 'data') + self.assertEqual(simple_field.render_headers(), '\r\n') + filename_field = RequestField('somename', 'data', filename='somefile.txt') + self.assertEqual(filename_field.render_headers(), '\r\n') + headers_field = RequestField('somename', 'data', headers={'Content-Length': 4}) + self.assertEqual(headers_field.render_headers(), + 'Content-Length: 4\r\n' + '\r\n') + + def test_make_multipart(self): + field = RequestField('somename', 'data') + field.make_multipart(content_type='image/jpg', content_location='/test') + self.assertEqual(field.render_headers(), + 'Content-Disposition: form-data; name="somename"\r\n' + 'Content-Type: image/jpg\r\n' + 'Content-Location: /test\r\n' + '\r\n') + + def test_render_parts(self): + field = RequestField('somename', 'data') + parts = field._render_parts({'name': 'value', 'filename': 'value'}) + self.assertTrue('name="value"' in parts) + self.assertTrue('filename="value"' in parts) + parts = field._render_parts([('name', 'value'), ('filename', 'value')]) + self.assertEqual(parts, 'name="value"; filename="value"') + + def test_render_part(self): + field = RequestField('somename', 'data') + param = field._render_part('filename', u('n\u00e4me')) + self.assertEqual(param, "filename*=utf-8''n%C3%A4me") diff --git a/test/test_filepost.py b/test/test_filepost.py index 70ab100..ca33d61 100644 --- a/test/test_filepost.py +++ b/test/test_filepost.py @@ -1,6 +1,7 @@ import unittest from urllib3.filepost import encode_multipart_formdata, iter_fields +from urllib3.fields import RequestField from urllib3.packages.six import b, u @@ -117,3 +118,16 @@ class TestMultipartEncoding(unittest.TestCase): self.assertEqual(content_type, 'multipart/form-data; boundary=' + str(BOUNDARY)) + + def test_request_fields(self): + fields = [RequestField('k', b'v', filename='somefile.txt', headers={'Content-Type': 'image/jpeg'})] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEquals(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Type: image/jpeg\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py index 64c86e8..e7b5c48 100644 --- a/test/test_proxymanager.py +++ b/test/test_proxymanager.py @@ -5,7 +5,7 @@ from urllib3.poolmanager import ProxyManager class TestProxyManager(unittest.TestCase): def test_proxy_headers(self): - p = ProxyManager(None) + p = ProxyManager('http://something:1234') url = 'http://pypi.python.org/test' # Verify default headers @@ -23,5 +23,21 @@ class TestProxyManager(unittest.TestCase): self.assertEqual(headers, provided_headers) + # Verify proxy with nonstandard port + provided_headers = {'Accept': 'application/json'} + expected_headers = provided_headers.copy() + expected_headers.update({'Host': 'pypi.python.org:8080'}) + url_with_port = 'http://pypi.python.org:8080/test' + headers = p._set_proxy_headers(url_with_port, provided_headers) + + self.assertEqual(headers, expected_headers) + + def test_default_port(self): + p = ProxyManager('http://something') + self.assertEqual(p.proxy.port, 80) + p = ProxyManager('https://something') + self.assertEqual(p.proxy.port, 443) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 199e379..90d34eb 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -1,6 +1,6 @@ import unittest -from io import BytesIO +from io import BytesIO, BufferedReader from urllib3.response import HTTPResponse from urllib3.exceptions import DecodeError @@ -112,5 +112,138 @@ class TestResponse(unittest.TestCase): self.assertEqual(r.read(1), b'f') self.assertEqual(r.read(2), b'oo') + def test_io(self): + import socket + try: + from http.client import HTTPResponse as OldHTTPResponse + except: + from httplib import HTTPResponse as OldHTTPResponse + + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + + self.assertEqual(resp.closed, False) + self.assertEqual(resp.readable(), True) + self.assertEqual(resp.writable(), False) + self.assertRaises(IOError, resp.fileno) + + resp.close() + self.assertEqual(resp.closed, True) + + # Try closing with an `httplib.HTTPResponse`, because it has an + # `isclosed` method. + hlr = OldHTTPResponse(socket.socket()) + resp2 = HTTPResponse(hlr, preload_content=False) + self.assertEqual(resp2.closed, False) + resp2.close() + self.assertEqual(resp2.closed, True) + + #also try when only data is present. + resp3 = HTTPResponse('foodata') + self.assertRaises(IOError, resp3.fileno) + + resp3._fp = 2 + # A corner case where _fp is present but doesn't have `closed`, + # `isclosed`, or `fileno`. Unlikely, but possible. + self.assertEqual(resp3.closed, True) + self.assertRaises(IOError, resp3.fileno) + + def test_io_bufferedreader(self): + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + br = BufferedReader(resp) + + self.assertEqual(br.read(), b'foo') + + br.close() + self.assertEqual(resp.closed, True) + + def test_streaming(self): + fp = BytesIO(b'foo') + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2, decode_content=False) + + self.assertEqual(next(stream), b'fo') + self.assertEqual(next(stream), b'o') + self.assertRaises(StopIteration, next, stream) + + def test_gzipped_streaming(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'gzip'}, + preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'f') + self.assertEqual(next(stream), b'oo') + self.assertRaises(StopIteration, next, stream) + + def test_deflate_streaming(self): + import zlib + data = zlib.compress(b'foo') + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'f') + self.assertEqual(next(stream), b'oo') + self.assertRaises(StopIteration, next, stream) + + def test_deflate2_streaming(self): + import zlib + compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + fp = BytesIO(data) + resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'}, + preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'f') + self.assertEqual(next(stream), b'oo') + self.assertRaises(StopIteration, next, stream) + + def test_empty_stream(self): + fp = BytesIO(b'') + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2, decode_content=False) + + self.assertRaises(StopIteration, next, stream) + + def test_mock_httpresponse_stream(self): + # Mock out a HTTP Request that does enough to make it through urllib3's + # read() and close() calls, and also exhausts and underlying file + # object. + class MockHTTPRequest(object): + self.fp = None + + def read(self, amt): + data = self.fp.read(amt) + if not data: + self.fp = None + + return data + + def close(self): + self.fp = None + + bio = BytesIO(b'foo') + fp = MockHTTPRequest() + fp.fp = bio + resp = HTTPResponse(fp, preload_content=False) + stream = resp.stream(2) + + self.assertEqual(next(stream), b'fo') + self.assertEqual(next(stream), b'o') + self.assertRaises(StopIteration, next, stream) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_util.py b/test/test_util.py index a989da6..b465fef 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,10 +1,23 @@ -import unittest import logging +import unittest + +from mock import patch from urllib3 import add_stderr_logger -from urllib3.util import get_host, make_headers, split_first, parse_url, Url -from urllib3.exceptions import LocationParseError +from urllib3.util import ( + get_host, + make_headers, + split_first, + parse_url, + Timeout, + Url, +) +from urllib3.exceptions import LocationParseError, TimeoutStateError +# This number represents a time in seconds, it doesn't mean anything in +# isolation. Setting to a high-ish value to avoid conflicts with the smaller +# numbers used for timeouts +TIMEOUT_EPOCH = 1000 class TestUtil(unittest.TestCase): def test_get_host(self): @@ -34,20 +47,20 @@ class TestUtil(unittest.TestCase): 'http://173.194.35.7:80/test': ('http', '173.194.35.7', 80), # IPv6 - '[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), - 'http://[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None), - 'http://[2a00:1450:4001:c01::67]/test': ('http', '2a00:1450:4001:c01::67', None), - 'http://[2a00:1450:4001:c01::67]:80': ('http', '2a00:1450:4001:c01::67', 80), - 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '2a00:1450:4001:c01::67', 80), + '[2a00:1450:4001:c01::67]': ('http', '[2a00:1450:4001:c01::67]', None), + 'http://[2a00:1450:4001:c01::67]': ('http', '[2a00:1450:4001:c01::67]', None), + 'http://[2a00:1450:4001:c01::67]/test': ('http', '[2a00:1450:4001:c01::67]', None), + 'http://[2a00:1450:4001:c01::67]:80': ('http', '[2a00:1450:4001:c01::67]', 80), + 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '[2a00:1450:4001:c01::67]', 80), # More IPv6 from http://www.ietf.org/rfc/rfc2732.txt - 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210', 8000), - 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '1080:0:0:0:8:800:200C:417A', None), - 'http://[3ffe:2a00:100:7031::1]': ('http', '3ffe:2a00:100:7031::1', None), - 'http://[1080::8:800:200C:417A]/foo': ('http', '1080::8:800:200C:417A', None), - 'http://[::192.9.5.5]/ipng': ('http', '::192.9.5.5', None), - 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '::FFFF:129.144.52.38', 42), - 'http://[2010:836B:4179::836B:4179]': ('http', '2010:836B:4179::836B:4179', None), + 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', '[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]', 8000), + 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '[1080:0:0:0:8:800:200C:417A]', None), + 'http://[3ffe:2a00:100:7031::1]': ('http', '[3ffe:2a00:100:7031::1]', None), + 'http://[1080::8:800:200C:417A]/foo': ('http', '[1080::8:800:200C:417A]', None), + 'http://[::192.9.5.5]/ipng': ('http', '[::192.9.5.5]', None), + 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '[::FFFF:129.144.52.38]', 42), + 'http://[2010:836B:4179::836B:4179]': ('http', '[2010:836B:4179::836B:4179]', None), } for url, expected_host in url_host_map.items(): returned_host = get_host(url) @@ -57,6 +70,8 @@ class TestUtil(unittest.TestCase): # TODO: Add more tests invalid_host = [ 'http://google.com:foo', + 'http://::1/', + 'http://::1:80/', ] for location in invalid_host: @@ -83,6 +98,9 @@ class TestUtil(unittest.TestCase): returned_url = parse_url(url) self.assertEquals(returned_url, expected_url) + def test_parse_url_invalid_IPv6(self): + self.assertRaises(ValueError, parse_url, '[::1') + def test_request_uri(self): url_host_map = { 'http://google.com/mail': '/mail', @@ -99,6 +117,17 @@ class TestUtil(unittest.TestCase): returned_url = parse_url(url) self.assertEquals(returned_url.request_uri, expected_request_uri) + def test_netloc(self): + url_netloc_map = { + 'http://google.com/mail': 'google.com', + 'http://google.com:80/mail': 'google.com:80', + 'google.com/foobar': 'google.com', + 'google.com:12345': 'google.com:12345', + } + + for url, expected_netloc in url_netloc_map.items(): + self.assertEquals(parse_url(url).netloc, expected_netloc) + def test_make_headers(self): self.assertEqual( make_headers(accept_encoding=True), @@ -148,3 +177,99 @@ class TestUtil(unittest.TestCase): logger.debug('Testing add_stderr_logger') logger.removeHandler(handler) + + def _make_time_pass(self, seconds, timeout, time_mock): + """ Make some time pass for the timeout object """ + time_mock.return_value = TIMEOUT_EPOCH + timeout.start_connect() + time_mock.return_value = TIMEOUT_EPOCH + seconds + return timeout + + def test_invalid_timeouts(self): + try: + Timeout(total=-1) + self.fail("negative value should throw exception") + except ValueError as e: + self.assertTrue('less than' in str(e)) + try: + Timeout(connect=2, total=-1) + self.fail("negative value should throw exception") + except ValueError as e: + self.assertTrue('less than' in str(e)) + + try: + Timeout(read=-1) + self.fail("negative value should throw exception") + except ValueError as e: + self.assertTrue('less than' in str(e)) + + # Booleans are allowed also by socket.settimeout and converted to the + # equivalent float (1.0 for True, 0.0 for False) + Timeout(connect=False, read=True) + + try: + Timeout(read="foo") + self.fail("string value should not be allowed") + except ValueError as e: + self.assertTrue('int or float' in str(e)) + + + @patch('urllib3.util.current_time') + def test_timeout(self, current_time): + timeout = Timeout(total=3) + + # make 'no time' elapse + timeout = self._make_time_pass(seconds=0, timeout=timeout, + time_mock=current_time) + self.assertEqual(timeout.read_timeout, 3) + self.assertEqual(timeout.connect_timeout, 3) + + timeout = Timeout(total=3, connect=2) + self.assertEqual(timeout.connect_timeout, 2) + + timeout = Timeout() + self.assertEqual(timeout.connect_timeout, Timeout.DEFAULT_TIMEOUT) + + # Connect takes 5 seconds, leaving 5 seconds for read + timeout = Timeout(total=10, read=7) + timeout = self._make_time_pass(seconds=5, timeout=timeout, + time_mock=current_time) + self.assertEqual(timeout.read_timeout, 5) + + # Connect takes 2 seconds, read timeout still 7 seconds + timeout = Timeout(total=10, read=7) + timeout = self._make_time_pass(seconds=2, timeout=timeout, + time_mock=current_time) + self.assertEqual(timeout.read_timeout, 7) + + timeout = Timeout(total=10, read=7) + self.assertEqual(timeout.read_timeout, 7) + + timeout = Timeout(total=None, read=None, connect=None) + self.assertEqual(timeout.connect_timeout, None) + self.assertEqual(timeout.read_timeout, None) + self.assertEqual(timeout.total, None) + + + def test_timeout_str(self): + timeout = Timeout(connect=1, read=2, total=3) + self.assertEqual(str(timeout), "Timeout(connect=1, read=2, total=3)") + timeout = Timeout(connect=1, read=None, total=3) + self.assertEqual(str(timeout), "Timeout(connect=1, read=None, total=3)") + + + @patch('urllib3.util.current_time') + def test_timeout_elapsed(self, current_time): + current_time.return_value = TIMEOUT_EPOCH + timeout = Timeout(total=3) + self.assertRaises(TimeoutStateError, timeout.get_connect_duration) + + timeout.start_connect() + self.assertRaises(TimeoutStateError, timeout.start_connect) + + current_time.return_value = TIMEOUT_EPOCH + 2 + self.assertEqual(timeout.get_connect_duration(), 2) + current_time.return_value = TIMEOUT_EPOCH + 37 + self.assertEqual(timeout.get_connect_duration(), 37) + + diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 661e33b..a81ab9c 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.1 +Metadata-Version: 1.0 Name: urllib3 -Version: 1.6 +Version: 1.7.1 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -25,7 +25,7 @@ Description: ======= - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. - - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage. + - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. @@ -89,14 +89,18 @@ Description: ======= Run the tests ============= - We use some external dependencies to run the urllib3 test suite. Easiest way to - run the tests is thusly from the urllib3 source root: :: + We use some external dependencies, multiple interpreters and code coverage + analysis while running test suite. Easiest way to run the tests is thusly the + ``tox`` utility: :: - $ pip install -r test-requirements.txt - $ nosetests - ..................................................... + $ tox + # [..] + py26: commands succeeded + py27: commands succeeded + py32: commands succeeded + py33: commands succeeded - Success! You could also ``pip install coverage`` to get code coverage reporting. + Note that code coverage less than 100% is regarded as a failing run. Contributing @@ -117,6 +121,54 @@ Description: ======= Changes ======= + 1.7.1 (2013-09-25) + ++++++++++++++++++ + + * Added granular timeout support with new `urllib3.util.Timeout` class. + (Issue #231) + + * Fixed Python 3.4 support. (Issue #238) + + + 1.7 (2013-08-14) + ++++++++++++++++ + + * More exceptions are now pickle-able, with tests. (Issue #174) + + * Fixed redirecting with relative URLs in Location header. (Issue #178) + + * Support for relative urls in ``Location: ...`` header. (Issue #179) + + * ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus + file-like functionality. (Issue #187) + + * Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will + skip hostname verification for SSL connections. (Issue #194) + + * New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a + generator wrapped around ``.read(...)``. (Issue #198) + + * IPv6 url parsing enforces brackets around the hostname. (Issue #199) + + * Fixed thread race condition in + ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204) + + * ``ProxyManager`` requests now include non-default port in ``Host: ...`` + header. (Issue #217) + + * Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139) + + * New ``RequestField`` object can be passed to the ``fields=...`` param which + can specify headers. (Issue #220) + + * Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails. + (Issue #221) + + * Use international headers when posting file names. (Issue #119) + + * Improved IPv6 support. (Issue #203) + + 1.6 (2013-04-25) ++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 69ec475..32759d9 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -8,11 +8,15 @@ setup.py test-requirements.txt dummyserver/__init__.py dummyserver/handlers.py +dummyserver/proxy.py dummyserver/server.py dummyserver/testcase.py +test/__init__.py +test/benchmark.py test/test_collections.py test/test_connectionpool.py test/test_exceptions.py +test/test_fields.py test/test_filepost.py test/test_poolmanager.py test/test_proxymanager.py @@ -22,6 +26,7 @@ urllib3/__init__.py urllib3/_collections.py urllib3/connectionpool.py urllib3/exceptions.py +urllib3/fields.py urllib3/filepost.py urllib3/poolmanager.py urllib3/request.py diff --git a/urllib3/__init__.py b/urllib3/__init__.py index ebd43b3..eed7006 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.6' +__version__ = '1.7.1' from .connectionpool import ( @@ -23,7 +23,7 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host +from .util import make_headers, get_host, Timeout # Set default logging handler to avoid "No handler found" warnings. diff --git a/urllib3/_collections.py b/urllib3/_collections.py index b35a736..282b8d5 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -5,7 +5,7 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php from collections import MutableMapping -from threading import Lock +from threading import RLock try: # Python 2.7+ from collections import OrderedDict @@ -40,18 +40,18 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func = dispose_func self._container = self.ContainerCls() - self._lock = Lock() + self.lock = RLock() def __getitem__(self, key): # Re-insert the item, moving it to the end of the eviction line. - with self._lock: + with self.lock: item = self._container.pop(key) self._container[key] = item return item def __setitem__(self, key, value): evicted_value = _Null - with self._lock: + with self.lock: # Possibly evict the existing value of 'key' evicted_value = self._container.get(key, _Null) self._container[key] = value @@ -65,21 +65,21 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func(evicted_value) def __delitem__(self, key): - with self._lock: + with self.lock: value = self._container.pop(key) if self.dispose_func: self.dispose_func(value) def __len__(self): - with self._lock: + with self.lock: return len(self._container) def __iter__(self): raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') def clear(self): - with self._lock: + with self.lock: # Copy pointers to all values, then wipe the mapping # under Python 2, this copies the list of values twice :-| values = list(self._container.values()) @@ -90,5 +90,5 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func(value) def keys(self): - with self._lock: + with self.lock: return self._container.keys() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 73fa9ca..691d4e2 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -4,12 +4,11 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging -import socket import errno +import logging from socket import error as SocketError, timeout as SocketTimeout -from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint +import socket try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -22,11 +21,15 @@ try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full + import Queue as _ # Platform-specific: Windows try: # Compiled with SSL? HTTPSConnection = object - BaseSSLError = None + + class BaseSSLError(BaseException): + pass + ssl = None try: # Python 3 @@ -41,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass -from .request import RequestMethods -from .response import HTTPResponse -from .util import get_host, is_connection_dropped, ssl_wrap_socket from .exceptions import ( ClosedPoolError, + ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, - TimeoutError, + ReadTimeoutError, + ProxyError, ) - -from .packages.ssl_match_hostname import match_hostname, CertificateError +from .packages.ssl_match_hostname import CertificateError, match_hostname from .packages import six - +from .request import RequestMethods +from .response import HTTPResponse +from .util import ( + assert_fingerprint, + get_host, + is_connection_dropped, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, + Timeout, +) xrange = six.moves.xrange @@ -93,11 +104,24 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - sock = socket.create_connection((self.host, self.port), self.timeout) + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) + if self._tunnel_host: + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Wrap socket using verification with the root certs in # trusted_root_certs self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, @@ -110,10 +134,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): if self.assert_fingerprint: assert_fingerprint(self.sock.getpeercert(binary_form=True), self.assert_fingerprint) - else: + elif self.assert_hostname is not False: match_hostname(self.sock.getpeercert(), self.assert_hostname or self.host) + ## Pool objects class ConnectionPool(object): @@ -126,6 +151,9 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): + # httplib doesn't like it when we include brackets in ipv6 addresses + host = host.strip('[]') + self.host = host self.port = port @@ -133,6 +161,8 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) class HTTPConnectionPool(ConnectionPool, RequestMethods): """ @@ -151,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): as a valid HTTP/1.0 or 1.1 status line, passed into :class:`httplib.HTTPConnection`. + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + :param timeout: - Socket timeout for each individual connection, can be a float. None - disables timeout. + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. :param maxsize: Number of connections to save that can be reused. More than 1 is useful @@ -171,20 +207,39 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param headers: Headers to include with all requests, unless other headers are given explicitly. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" """ scheme = 'http' - def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, - block=False, headers=None): + def __init__(self, host, port=None, strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, + headers=None, _proxy=None, _proxy_headers=None): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict + + # This is for backwards compatibility and can be removed once a timeout + # can only be set to a Timeout object + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + self.timeout = timeout + self.pool = self.QueueCls(maxsize) self.block = block + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): self.pool.put(None) @@ -200,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, - port=self.port, - strict=self.strict) + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + + return HTTPConnection(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + **extra_params) + def _get_conn(self, timeout=None): """ @@ -263,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): % self.host) # Connection never got put back into the pool, close it. - conn.close() + if conn: + conn.close() + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ Perform a request on a given httplib connection object taken from our pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. """ self.num_requests += 1 - if timeout is _Default: - timeout = self.timeout - - conn.timeout = timeout # This only does anything in Py26+ - conn.request(method, url, **httplib_request_kw) + timeout_obj = self._get_timeout(timeout) - # Set timeout - sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. - if sock: - sock.settimeout(timeout) + try: + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # request.py. It also calls makefile (recv) on the socket + conn.request(method, url, **httplib_request_kw) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, timeout_obj.connect_timeout)) + + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr + if hasattr(conn, 'sock') and \ + read_timeout is not None and \ + read_timeout is not Timeout.DEFAULT_TIMEOUT: + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + conn.sock.settimeout(read_timeout) + + # Receive the response from the server + try: + try: # Python 2.7+, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older + httplib_response = conn.getresponse() + except SocketTimeout: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) + + except SocketError as e: # Platform-specific: Python 2 + # See the above comment about EAGAIN in Python 3. In Python 2 we + # have to specifically catch it and throw the timeout error + if e.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + raise - try: # Python 2.7+, use buffering of HTTP responses - httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older - httplib_response = conn.getresponse() # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -367,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -375,7 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): use the pool on an HTTP proxy and request foreign hosts. :param timeout: - If specified, overrides the default timeout for this one request. + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. :param pool_timeout: If set and the pool is set to block=True, then this method will @@ -402,18 +522,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if retries < 0: raise MaxRetryError(self, url) - if timeout is _Default: - timeout = self.timeout - if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - host = "%s://%s" % (self.scheme, self.host) - if self.port: - host = "%s:%d" % (host, self.port) - raise HostChangedError(self, url, retries - 1) conn = None @@ -444,18 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.release_conn()`` is called (implicitly by # ``response.read()``) - except Empty as e: + except Empty: # Timed out by queue - raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" % - pool_timeout) + raise ReadTimeoutError( + self, url, "Read timed out, no pool connections are available.") - except SocketTimeout as e: + except SocketTimeout: # Timed out by socket - raise TimeoutError(self, "Request timed out. (timeout=%s)" % - timeout) + raise ReadTimeoutError(self, url, "Read timed out.") except BaseSSLError as e: # SSL certificate error + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") raise SSLError(e) except CertificateError as e: @@ -463,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise SSLError(e) except (HTTPException, SocketError) as e: + if isinstance(e, SocketError) and self.proxy is not None: + raise ProxyError('Cannot connect to proxy. ' + 'Socket error: %s.' % e) + # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below @@ -511,6 +630,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and ``ssl_version`` are only used if :mod:`ssl` is available and are fed into @@ -523,13 +643,13 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, + _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None): - HTTPConnectionPool.__init__(self, host, port, - strict, timeout, maxsize, - block, headers) + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, + block, headers, _proxy, _proxy_headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -538,6 +658,34 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + def _prepare_conn(self, connection): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(connection, VerifiedHTTPSConnection): + connection.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + connection.ssl_version = self.ssl_version + + if self.proxy is not None: + # Python 2.7+ + try: + set_tunnel = connection.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = connection._set_tunnel + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib + # would improperly set Host: header to proxy's IP:port. + connection.connect() + + return connection + def _new_conn(self): """ Return a fresh :class:`httplib.HTTPSConnection`. @@ -546,26 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") - - return HTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - - connection = VerifiedHTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - - connection.ssl_version = self.ssl_version - - return connection + connection_class = HTTPSConnection + else: + connection_class = VerifiedHTTPSConnection + + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + connection = connection_class(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + + return self._prepare_conn(connection) def connection_from_url(url, **kw): diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py index 277ee0b..b8cd933 100644 --- a/urllib3/contrib/ntlmpool.py +++ b/urllib3/contrib/ntlmpool.py @@ -33,7 +33,7 @@ class NTLMConnectionPool(HTTPSConnectionPool): def __init__(self, user, pw, authurl, *args, **kwargs): """ authurl is a random URL on the server that is protected by NTLM. - user is the Windows user, probably in the DOMAIN\username format. + user is the Windows user, probably in the DOMAIN\\username format. pw is the password for the user. """ super(NTLMConnectionPool, self).__init__(*args, **kwargs) diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 5c4c6d8..d43bcd6 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -20,13 +20,13 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. ''' -from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, - SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT from ndg.httpsclient.subj_alt_name import SubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from socket import _fileobject import ssl +from cStringIO import StringIO from .. import connectionpool from .. import util @@ -99,6 +99,172 @@ def get_subj_alt_name(peer_cert): return dns_name +class fileobject(_fileobject): + + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + data = self._sock.recv(left) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self._sock.recv + while True: + try: + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + except OpenSSL.SSL.WantReadError: + continue + break + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + + class WrappedSocket(object): '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' @@ -106,8 +272,11 @@ class WrappedSocket(object): self.connection = connection self.socket = socket + def fileno(self): + return self.socket.fileno() + def makefile(self, mode, bufsize=-1): - return _fileobject(self.connection, mode, bufsize) + return fileobject(self.connection, mode, bufsize) def settimeout(self, timeout): return self.socket.settimeout(timeout) @@ -115,10 +284,14 @@ class WrappedSocket(object): def sendall(self, data): return self.connection.sendall(data) + def close(self): + return self.connection.shutdown() + def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() + if not x509: - raise ssl.SSLError('') + return x509 if binary_form: return OpenSSL.crypto.dump_certificate( @@ -159,9 +332,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) cnx.set_connect_state() - try: - cnx.do_handshake() - except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake', e) + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + break return WrappedSocket(cnx, sock) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 8dd76af..98ef9ab 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -20,7 +20,18 @@ class PoolError(HTTPError): def __reduce__(self): # For pickling purposes. - return self.__class__, (None, self.url) + return self.__class__, (None, None) + + +class RequestError(PoolError): + "Base exception for PoolErrors that have associated URLs." + def __init__(self, pool, url, message): + self.url = url + PoolError.__init__(self, pool, message) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url, None) class SSLError(HTTPError): @@ -28,6 +39,11 @@ class SSLError(HTTPError): pass +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass @@ -35,7 +51,7 @@ class DecodeError(HTTPError): ## Leaf Exceptions -class MaxRetryError(PoolError): +class MaxRetryError(RequestError): "Raised when the maximum number of retries is exceeded." def __init__(self, pool, url, reason=None): @@ -47,23 +63,41 @@ class MaxRetryError(PoolError): else: message += " (Caused by redirect)" - PoolError.__init__(self, pool, message) - self.url = url + RequestError.__init__(self, pool, url, message) -class HostChangedError(PoolError): +class HostChangedError(RequestError): "Raised when an existing pool gets a request for a foreign host." def __init__(self, pool, url, retries=3): message = "Tried to open a foreign host with url: %s" % url - PoolError.__init__(self, pool, message) - - self.url = url + RequestError.__init__(self, pool, url, message) self.retries = retries -class TimeoutError(PoolError): - "Raised when a socket timeout occurs." +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + ` and :exc:`ConnectTimeoutErrors `. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" pass diff --git a/urllib3/fields.py b/urllib3/fields.py new file mode 100644 index 0000000..ed01765 --- /dev/null +++ b/urllib3/fields.py @@ -0,0 +1,177 @@ +# urllib3/fields.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetimes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except UnicodeEncodeError: + pass + else: + return result + if not six.PY3: # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from parameter + of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) + tuple where the MIME type is optional. For example: :: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as + `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value: + parts.append(self._render_part(name, value)) + + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers['Content-Disposition'] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index 526a740..4575582 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -12,6 +12,7 @@ from io import BytesIO from .packages import six from .packages.six import b +from .fields import RequestField writer = codecs.lookup('utf-8')[3] @@ -23,15 +24,38 @@ def choose_boundary(): return uuid4().hex -def get_content_type(filename): - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): """ Iterate over fields. + .. deprecated :: + + The addition of `~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + `~urllib3.fields.RequestField` objects, instead. + Supports list of (k, v) tuples and dicts. + """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) @@ -44,15 +68,7 @@ def encode_multipart_formdata(fields, boundary=None): Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields or list of (key, value) or (key, value, MIME type) - field tuples. The key is treated as the field name, and the value as - the body of the form-data bytes. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section and a suitable MIME type is guessed based on the - filename. If the value is a tuple of three elements, then the third - element is treated as an explicit MIME type of the form-data section. - - Field names and filenames must be unicode. + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). :param boundary: If not specified, then a random boundary will be generated using @@ -62,24 +78,11 @@ def encode_multipart_formdata(fields, boundary=None): if boundary is None: boundary = choose_boundary() - for fieldname, value in iter_fields(fields): + for field in iter_field_objects(fields): body.write(b('--%s\r\n' % (boundary))) - if isinstance(value, tuple): - if len(value) == 3: - filename, data, content_type = value - else: - filename, data = value - content_type = get_content_type(filename) - writer(body).write('Content-Disposition: form-data; name="%s"; ' - 'filename="%s"\r\n' % (fieldname, filename)) - body.write(b('Content-Type: %s\r\n\r\n' % - (content_type,))) - else: - data = value - writer(body).write('Content-Disposition: form-data; name="%s"\r\n' - % (fieldname)) - body.write(b'\r\n') + writer(body).write(field.render_headers()) + data = field.data if isinstance(data, int): data = str(data) # Backwards compatibility diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py index 9560b04..2d61ac2 100644 --- a/urllib3/packages/ssl_match_hostname/__init__.py +++ b/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,23 +7,60 @@ __version__ = '3.2.2' class CertificateError(ValueError): pass -def _dnsname_to_pat(dn): +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ pats = [] - for frag in dn.split(r'.'): - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + if not dn: + return False + + parts = dn.split(r'.') + leftmost = parts[0] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in parts[1:]: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + def match_hostname(cert, hostname): """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. CertificateError is raised on failure. On success, the function returns nothing. @@ -34,7 +71,7 @@ def match_hostname(cert, hostname): san = cert.get('subjectAltName', ()) for key, value in san: if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if not dnsnames: @@ -45,7 +82,7 @@ def match_hostname(cert, hostname): # XXX according to RFC 2818, the most specific Common Name # must be used. if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if len(dnsnames) > 1: diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index ce0c248..e7f8667 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -6,9 +6,14 @@ import logging +try: # Python 3 + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin + from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import connection_from_url, port_by_scheme +from .connectionpool import port_by_scheme from .request import RequestMethods from .util import parse_url @@ -55,6 +60,8 @@ class PoolManager(RequestMethods): """ + proxy = None + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw @@ -94,20 +101,23 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' + port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) - # If the scheme, host, or port doesn't match existing open connections, - # open a new ConnectionPool. - pool = self.pools.get(pool_key) - if pool: - return pool + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool - # Make a fresh ConnectionPool of the desired type - pool = self._new_pool(scheme, host, port) - self.pools[pool_key] = pool + # Make a fresh ConnectionPool of the desired type + pool = self._new_pool(scheme, host, port) + self.pools[pool_key] = pool return pool def connection_from_url(self, url): @@ -139,12 +149,19 @@ class PoolManager(RequestMethods): if 'headers' not in kw: kw['headers'] = self.headers - response = conn.urlopen(method, u.request_uri, **kw) + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) redirect_location = redirect and response.get_redirect_location() if not redirect_location: return response + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + + # RFC 2616, Section 10.3.4 if response.status == 303: method = 'GET' @@ -154,15 +171,59 @@ class PoolManager(RequestMethods): return self.urlopen(method, redirect_location, **kw) -class ProxyManager(RequestMethods): +class ProxyManager(PoolManager): """ - Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. The ProxyManager - class will automatically set the 'Host' header if it is not provided. + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param poxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + """ - def __init__(self, proxy_pool): - self.proxy_pool = proxy_pool + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + assert self.proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http'): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme) def _set_proxy_headers(self, url, headers=None): """ @@ -171,22 +232,28 @@ class ProxyManager(RequestMethods): """ headers_ = {'Accept': '*/*'} - host = parse_url(url).host - if host: - headers_['Host'] = host + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc if headers: headers_.update(headers) - return headers_ - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." - kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) - return self.proxy_pool.urlopen(method, url, **kw) + u = parse_url(url) + + if u.scheme == "http": + # It's too late to set proxy headers on per-request basis for + # tunnelled HTTPS connections, should use + # constructor's proxy_headers instead. + kw['headers'] = self._set_proxy_headers(url, kw.get('headers', + self.headers)) + kw['headers'].update(self.proxy_headers) + + return super(ProxyManager, self).urlopen(method, url, redirect, **kw) -def proxy_from_url(url, **pool_kw): - proxy_pool = connection_from_url(url, **pool_kw) - return ProxyManager(proxy_pool) +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index bf0256e..66a9a0e 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -30,7 +30,7 @@ class RequestMethods(object): in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are - encoded in the *body* of the request using multipart or www-orm-urlencoded + encoded in the *body* of the request using multipart or www-form-urlencoded (such as for POST, PUT, PATCH). :meth:`.request` is for making any kind of request, it will look up the diff --git a/urllib3/response.py b/urllib3/response.py index 1685760..4efff5a 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -7,9 +7,11 @@ import logging import zlib +import io from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type +from .util import is_fp_closed log = logging.getLogger(__name__) @@ -48,7 +50,7 @@ def _get_decoder(mode): return DeflateDecoder() -class HTTPResponse(object): +class HTTPResponse(io.IOBase): """ HTTP Response container. @@ -72,6 +74,7 @@ class HTTPResponse(object): """ CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -105,7 +108,7 @@ class HTTPResponse(object): code and valid location. ``None`` if redirect status and no location. ``False`` if not a redirect status code. """ - if self.status in [301, 302, 303, 307]: + if self.status in self.REDIRECT_STATUSES: return self.headers.get('location') return False @@ -183,11 +186,13 @@ class HTTPResponse(object): try: if decode_content and self._decoder: data = self._decoder.decompress(data) - except (IOError, zlib.error): - raise DecodeError("Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding) + except (IOError, zlib.error) as e: + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e) - if flush_decoder and self._decoder: + if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) data += buf + self._decoder.flush() @@ -200,6 +205,29 @@ class HTTPResponse(object): if self._original_response and self._original_response.isclosed(): self.release_conn() + def stream(self, amt=2**16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -239,3 +267,35 @@ class HTTPResponse(object): def getheader(self, name, default=None): return self.headers.get(name, default) + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + @property + def closed(self): + if self._fp is None: + return True + elif hasattr(self._fp, 'closed'): + return self._fp.closed + elif hasattr(self._fp, 'isclosed'): # Python 2 + return self._fp.isclosed() + else: + return True + + def fileno(self): + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise IOError("The file-like object this HTTPResponse is wrapped " + "around has no file descriptor") + + def flush(self): + if self._fp is not None and hasattr(self._fp, 'flush'): + return self._fp.flush() + + def readable(self): + return True diff --git a/urllib3/util.py b/urllib3/util.py index 544f9ed..266c9ed 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -6,10 +6,11 @@ from base64 import b64encode +from binascii import hexlify, unhexlify from collections import namedtuple -from socket import error as SocketError from hashlib import md5, sha1 -from binascii import hexlify, unhexlify +from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT +import time try: from select import poll, POLLIN @@ -31,9 +32,234 @@ try: # Test for SSL features except ImportError: pass - from .packages import six -from .exceptions import LocationParseError, SSLError +from .exceptions import LocationParseError, SSLError, TimeoutStateError + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + The maximum amount of time to wait for an HTTP request to connect and + return. This combines the connect and read timeouts into one. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, not + the total amount of time for the request to return a complete response. + As an example, you may want a request to return within 7 seconds or + fail, so you set the ``total`` timeout to 7 seconds. If the server + sends one byte to you every 5 seconds, the request will **not** trigger + time out. This case is admittedly rare. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, connect=_Default, read=_Default, total=None): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): @@ -61,6 +287,13 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', return uri + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + def split_first(s, delims): """ @@ -114,7 +347,7 @@ def parse_url(url): # While this code has overlap with stdlib's urlparse, it is much # simplified for our needs and less annoying. - # Additionally, this imeplementations does silly things to be optimal + # Additionally, this implementations does silly things to be optimal # on CPython. scheme = None @@ -143,7 +376,8 @@ def parse_url(url): # IPv6 if url and url[0] == '[': - host, url = url[1:].split(']', 1) + host, url = url.split(']', 1) + host += ']' # Port if ':' in url: @@ -341,6 +575,20 @@ def assert_fingerprint(cert, fingerprint): .format(hexlify(fingerprint_bytes), hexlify(cert_digest))) +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + if hasattr(obj, 'fp'): + # Object is a container for another file-like object that gets released + # on exhaustion (e.g. HTTPResponse) + return obj.fp is None + + return obj.closed + if SSLContext is not None: # Python 3.2+ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, -- cgit v1.2.3