aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSVN-Git Migration <python-modules-team@lists.alioth.debian.org>2015-10-08 13:19:35 -0700
committerSVN-Git Migration <python-modules-team@lists.alioth.debian.org>2015-10-08 13:19:35 -0700
commit52980ebd0a4eb75acf055a2256e095772c1fa7c6 (patch)
treef339a7e66934caa7948c15e8c5bd3ea04ee72e5a
parent92b84b67f7b187b81dacbf1ae46d59a1d0b5b125 (diff)
downloadpython-urllib3-52980ebd0a4eb75acf055a2256e095772c1fa7c6.tar
python-urllib3-52980ebd0a4eb75acf055a2256e095772c1fa7c6.tar.gz
Imported Upstream version 1.7.1
-rw-r--r--CHANGES.rst48
-rw-r--r--CONTRIBUTORS.txt30
-rw-r--r--PKG-INFO70
-rw-r--r--README.rst18
-rw-r--r--dummyserver/handlers.py57
-rwxr-xr-xdummyserver/proxy.py137
-rwxr-xr-xdummyserver/server.py50
-rw-r--r--dummyserver/testcase.py86
-rw-r--r--setup.cfg1
-rw-r--r--test-requirements.txt5
-rw-r--r--test/__init__.py0
-rw-r--r--test/benchmark.py77
-rw-r--r--test/test_connectionpool.py29
-rw-r--r--test/test_exceptions.py30
-rw-r--r--test/test_fields.py44
-rw-r--r--test/test_filepost.py14
-rw-r--r--test/test_proxymanager.py18
-rw-r--r--test/test_response.py135
-rw-r--r--test/test_util.py155
-rw-r--r--urllib3.egg-info/PKG-INFO70
-rw-r--r--urllib3.egg-info/SOURCES.txt5
-rw-r--r--urllib3/__init__.py4
-rw-r--r--urllib3/_collections.py16
-rw-r--r--urllib3/connectionpool.py286
-rw-r--r--urllib3/contrib/ntlmpool.py2
-rw-r--r--urllib3/contrib/pyopenssl.py193
-rw-r--r--urllib3/exceptions.py54
-rw-r--r--urllib3/fields.py177
-rw-r--r--urllib3/filepost.py57
-rw-r--r--urllib3/packages/ssl_match_hostname/__init__.py67
-rw-r--r--urllib3/poolmanager.py121
-rw-r--r--urllib3/request.py2
-rw-r--r--urllib3/response.py72
-rw-r--r--urllib3/util.py260
34 files changed, 2112 insertions, 278 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index a2a0da8..891fd79 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,54 @@
Changes
=======
+1.7.1 (2013-09-25)
+++++++++++++++++++
+
+* Added granular timeout support with new `urllib3.util.Timeout` class.
+ (Issue #231)
+
+* Fixed Python 3.4 support. (Issue #238)
+
+
+1.7 (2013-08-14)
+++++++++++++++++
+
+* More exceptions are now pickle-able, with tests. (Issue #174)
+
+* Fixed redirecting with relative URLs in Location header. (Issue #178)
+
+* Support for relative urls in ``Location: ...`` header. (Issue #179)
+
+* ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus
+ file-like functionality. (Issue #187)
+
+* Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will
+ skip hostname verification for SSL connections. (Issue #194)
+
+* New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a
+ generator wrapped around ``.read(...)``. (Issue #198)
+
+* IPv6 url parsing enforces brackets around the hostname. (Issue #199)
+
+* Fixed thread race condition in
+ ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204)
+
+* ``ProxyManager`` requests now include non-default port in ``Host: ...``
+ header. (Issue #217)
+
+* Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139)
+
+* New ``RequestField`` object can be passed to the ``fields=...`` param which
+ can specify headers. (Issue #220)
+
+* Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails.
+ (Issue #221)
+
+* Use international headers when posting file names. (Issue #119)
+
+* Improved IPv6 support. (Issue #203)
+
+
1.6 (2013-04-25)
++++++++++++++++
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index 19f59ce..e1aca42 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -60,5 +60,35 @@ In chronological order:
* Marc Schlaich <marc.schlaich@gmail.com>
* Various bugfixes and test improvements.
+* Bryce Boe <bbzbryce@gmail.com>
+ * Correct six.moves conflict
+ * Fixed pickle support of some exceptions
+
+* Boris Figovsky <boris.figovsky@ravellosystems.com>
+ * Allowed to skip SSL hostname verification
+
+* Cory Benfield <http://lukasa.co.uk/about/>
+ * Stream method for Response objects.
+ * Return native strings in header values.
+ * Generate 'Host' header when using proxies.
+
+* Jason Robinson <jaywink@basshero.org>
+ * Add missing WrappedSocket.fileno method in PyOpenSSL
+
+* Audrius Butkevicius <audrius.butkevicius@elastichosts.com>
+ * Fixed a race condition
+
+* Stanislav Vitkovskiy <stas.vitkovsky@gmail.com>
+ * Added HTTPS (CONNECT) proxy support
+
+* Stephen Holsapple <sholsapp@gmail.com>
+ * Added abstraction for granular control of request fields
+
+* Martin von Gagern <Martin.vGagern@gmx.net>
+ * Support for non-ASCII header parameters
+
+* Kevin Burke <kev@inburke.com> and Pavel Kirichenko <juanych@yandex-team.ru>
+ * Support for separate connect and request timeouts
+
* [Your name or handle] <[email or website]>
* [Brief summary of your changes]
diff --git a/PKG-INFO b/PKG-INFO
index 661e33b..a81ab9c 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 1.1
+Metadata-Version: 1.0
Name: urllib3
-Version: 1.6
+Version: 1.7.1
Summary: HTTP library with thread-safe connection pooling, file post, and more.
Home-page: http://urllib3.readthedocs.org/
Author: Andrey Petrov
@@ -25,7 +25,7 @@ Description: =======
- Supports gzip and deflate decoding.
- Thread-safe and sanity-safe.
- Works with AppEngine, gevent, and eventlib.
- - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage.
+ - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage.
- Small and easy to understand codebase perfect for extending and building upon.
For a more comprehensive solution, have a look at
`Requests <http://python-requests.org/>`_ which is also powered by urllib3.
@@ -89,14 +89,18 @@ Description: =======
Run the tests
=============
- We use some external dependencies to run the urllib3 test suite. Easiest way to
- run the tests is thusly from the urllib3 source root: ::
+ We use some external dependencies, multiple interpreters and code coverage
+ analysis while running test suite. Easiest way to run the tests is thusly the
+ ``tox`` utility: ::
- $ pip install -r test-requirements.txt
- $ nosetests
- .....................................................
+ $ tox
+ # [..]
+ py26: commands succeeded
+ py27: commands succeeded
+ py32: commands succeeded
+ py33: commands succeeded
- Success! You could also ``pip install coverage`` to get code coverage reporting.
+ Note that code coverage less than 100% is regarded as a failing run.
Contributing
@@ -117,6 +121,54 @@ Description: =======
Changes
=======
+ 1.7.1 (2013-09-25)
+ ++++++++++++++++++
+
+ * Added granular timeout support with new `urllib3.util.Timeout` class.
+ (Issue #231)
+
+ * Fixed Python 3.4 support. (Issue #238)
+
+
+ 1.7 (2013-08-14)
+ ++++++++++++++++
+
+ * More exceptions are now pickle-able, with tests. (Issue #174)
+
+ * Fixed redirecting with relative URLs in Location header. (Issue #178)
+
+ * Support for relative urls in ``Location: ...`` header. (Issue #179)
+
+ * ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus
+ file-like functionality. (Issue #187)
+
+ * Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will
+ skip hostname verification for SSL connections. (Issue #194)
+
+ * New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a
+ generator wrapped around ``.read(...)``. (Issue #198)
+
+ * IPv6 url parsing enforces brackets around the hostname. (Issue #199)
+
+ * Fixed thread race condition in
+ ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204)
+
+ * ``ProxyManager`` requests now include non-default port in ``Host: ...``
+ header. (Issue #217)
+
+ * Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139)
+
+ * New ``RequestField`` object can be passed to the ``fields=...`` param which
+ can specify headers. (Issue #220)
+
+ * Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails.
+ (Issue #221)
+
+ * Use international headers when posting file names. (Issue #119)
+
+ * Improved IPv6 support. (Issue #203)
+
+
1.6 (2013-04-25)
++++++++++++++++
diff --git a/README.rst b/README.rst
index 75f05d8..b126647 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,7 @@ Highlights
- Supports gzip and deflate decoding.
- Thread-safe and sanity-safe.
- Works with AppEngine, gevent, and eventlib.
-- Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage.
+- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage.
- Small and easy to understand codebase perfect for extending and building upon.
For a more comprehensive solution, have a look at
`Requests <http://python-requests.org/>`_ which is also powered by urllib3.
@@ -81,14 +81,18 @@ pools, you should look at
Run the tests
=============
-We use some external dependencies to run the urllib3 test suite. Easiest way to
-run the tests is thusly from the urllib3 source root: ::
+We use some external dependencies, multiple interpreters and code coverage
+analysis while running test suite. Easiest way to run the tests is thusly the
+``tox`` utility: ::
- $ pip install -r test-requirements.txt
- $ nosetests
- .....................................................
+ $ tox
+ # [..]
+ py26: commands succeeded
+ py27: commands succeeded
+ py32: commands succeeded
+ py33: commands succeeded
-Success! You could also ``pip install coverage`` to get code coverage reporting.
+Note that code coverage less than 100% is regarded as a failing run.
Contributing
diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py
index ab48b53..bc51f31 100644
--- a/dummyserver/handlers.py
+++ b/dummyserver/handlers.py
@@ -87,7 +87,7 @@ class TestingApp(WSGIHandler):
if request.method != method:
return Response("Wrong method: %s != %s" %
- (method, request.method), status='400')
+ (method, request.method), status='400 Bad Request')
return Response()
def upload(self, request):
@@ -100,17 +100,18 @@ class TestingApp(WSGIHandler):
if len(files_) != 1:
return Response("Expected 1 file for '%s', not %d" %(param, len(files_)),
- status='400')
+ status='400 Bad Request')
file_ = files_[0]
data = file_['body']
if int(size) != len(data):
return Response("Wrong size: %d != %d" %
- (size, len(data)), status='400')
+ (size, len(data)), status='400 Bad Request')
if filename != file_['filename']:
return Response("Wrong filename: %s != %s" %
- (filename, file_.filename), status='400')
+ (filename, file_.filename),
+ status='400 Bad Request')
return Response()
@@ -118,7 +119,7 @@ class TestingApp(WSGIHandler):
"Perform a redirect to ``target``"
target = request.params.get('target', '/')
headers = [('Location', target)]
- return Response(status='303', headers=headers)
+ return Response(status='303 See Other', headers=headers)
def keepalive(self, request):
if request.params.get('close', b'0') == b'1':
@@ -169,3 +170,49 @@ class TestingApp(WSGIHandler):
def shutdown(self, request):
sys.exit()
+
+
+# RFC2231-aware replacement of internal tornado function
+def _parse_header(line):
+ r"""Parse a Content-type like header.
+
+ Return the main content-type and a dictionary of options.
+
+ >>> d = _parse_header("CD: fd; foo=\"bar\"; file*=utf-8''T%C3%A4st")[1]
+ >>> d['file'] == 'T\u00e4st'
+ True
+ >>> d['foo']
+ 'bar'
+ """
+ import tornado.httputil
+ import email.utils
+ from urllib3.packages import six
+ if not six.PY3:
+ line = line.encode('utf-8')
+ parts = tornado.httputil._parseparam(';' + line)
+ key = next(parts)
+ # decode_params treats first argument special, but we already stripped key
+ params = [('Dummy', 'value')]
+ for p in parts:
+ i = p.find('=')
+ if i >= 0:
+ name = p[:i].strip().lower()
+ value = p[i + 1:].strip()
+ params.append((name, value))
+ params = email.utils.decode_params(params)
+ params.pop(0) # get rid of the dummy again
+ pdict = {}
+ for name, value in params:
+ print(repr(value))
+ value = email.utils.collapse_rfc2231_value(value)
+ if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
+ value = value[1:-1]
+ pdict[name] = value
+ return key, pdict
+
+# TODO: make the following conditional as soon as we know a version
+# which does not require this fix.
+# See https://github.com/facebook/tornado/issues/868
+if True:
+ import tornado.httputil
+ tornado.httputil._parse_header = _parse_header
diff --git a/dummyserver/proxy.py b/dummyserver/proxy.py
new file mode 100755
index 0000000..aca92a7
--- /dev/null
+++ b/dummyserver/proxy.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+#
+# Simple asynchronous HTTP proxy with tunnelling (CONNECT).
+#
+# GET/POST proxying based on
+# http://groups.google.com/group/python-tornado/msg/7bea08e7a049cf26
+#
+# Copyright (C) 2012 Senko Rasic <senko.rasic@dobarkod.hr>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import sys
+import socket
+
+import tornado.httpserver
+import tornado.ioloop
+import tornado.iostream
+import tornado.web
+import tornado.httpclient
+
+__all__ = ['ProxyHandler', 'run_proxy']
+
+
+class ProxyHandler(tornado.web.RequestHandler):
+ SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT']
+
+ @tornado.web.asynchronous
+ def get(self):
+
+ def handle_response(response):
+ if response.error and not isinstance(response.error,
+ tornado.httpclient.HTTPError):
+ self.set_status(500)
+ self.write('Internal server error:\n' + str(response.error))
+ self.finish()
+ else:
+ self.set_status(response.code)
+ for header in ('Date', 'Cache-Control', 'Server',
+ 'Content-Type', 'Location'):
+ v = response.headers.get(header)
+ if v:
+ self.set_header(header, v)
+ if response.body:
+ self.write(response.body)
+ self.finish()
+
+ req = tornado.httpclient.HTTPRequest(url=self.request.uri,
+ method=self.request.method, body=self.request.body,
+ headers=self.request.headers, follow_redirects=False,
+ allow_nonstandard_methods=True)
+
+ client = tornado.httpclient.AsyncHTTPClient()
+ try:
+ client.fetch(req, handle_response)
+ except tornado.httpclient.HTTPError as e:
+ if hasattr(e, 'response') and e.response:
+ self.handle_response(e.response)
+ else:
+ self.set_status(500)
+ self.write('Internal server error:\n' + str(e))
+ self.finish()
+
+ @tornado.web.asynchronous
+ def post(self):
+ return self.get()
+
+ @tornado.web.asynchronous
+ def connect(self):
+ host, port = self.request.uri.split(':')
+ client = self.request.connection.stream
+
+ def read_from_client(data):
+ upstream.write(data)
+
+ def read_from_upstream(data):
+ client.write(data)
+
+ def client_close(data=None):
+ if upstream.closed():
+ return
+ if data:
+ upstream.write(data)
+ upstream.close()
+
+ def upstream_close(data=None):
+ if client.closed():
+ return
+ if data:
+ client.write(data)
+ client.close()
+
+ def start_tunnel():
+ client.read_until_close(client_close, read_from_client)
+ upstream.read_until_close(upstream_close, read_from_upstream)
+ client.write(b'HTTP/1.0 200 Connection established\r\n\r\n')
+
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
+ upstream = tornado.iostream.IOStream(s)
+ upstream.connect((host, int(port)), start_tunnel)
+
+
+def run_proxy(port, start_ioloop=True):
+ """
+ Run proxy on the specified port. If start_ioloop is True (default),
+ the tornado IOLoop will be started immediately.
+ """
+ app = tornado.web.Application([
+ (r'.*', ProxyHandler),
+ ])
+ app.listen(port)
+ ioloop = tornado.ioloop.IOLoop.instance()
+ if start_ioloop:
+ ioloop.start()
+
+if __name__ == '__main__':
+ port = 8888
+ if len(sys.argv) > 1:
+ port = int(sys.argv[1])
+
+ print ("Starting HTTP proxy on port %d" % port)
+ run_proxy(port)
diff --git a/dummyserver/server.py b/dummyserver/server.py
index 9031664..f4f98a4 100755
--- a/dummyserver/server.py
+++ b/dummyserver/server.py
@@ -11,11 +11,14 @@ import sys
import threading
import socket
+from tornado import netutil
import tornado.wsgi
import tornado.httpserver
import tornado.ioloop
+import tornado.web
from dummyserver.handlers import TestingApp
+from dummyserver.proxy import ProxyHandler
log = logging.getLogger(__name__)
@@ -36,28 +39,29 @@ class SocketServerThread(threading.Thread):
"""
:param socket_handler: Callable which receives a socket argument for one
request.
- :param ready_lock: Lock which gets released when the socket handler is
+ :param ready_event: Event which gets set when the socket handler is
ready to receive requests.
"""
def __init__(self, socket_handler, host='localhost', port=8081,
- ready_lock=None):
+ ready_event=None):
threading.Thread.__init__(self)
self.socket_handler = socket_handler
self.host = host
- self.port = port
- self.ready_lock = ready_lock
+ self.ready_event = ready_event
def _start_server(self):
sock = socket.socket()
- sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- sock.bind((self.host, self.port))
+ if sys.platform != 'win32':
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ sock.bind((self.host, 0))
+ self.port = sock.getsockname()[1]
# Once listen() returns, the server socket is ready
sock.listen(1)
- if self.ready_lock:
- self.ready_lock.release()
+ if self.ready_event:
+ self.ready_event.set()
self.socket_handler(sock)
sock.close()
@@ -67,34 +71,44 @@ class SocketServerThread(threading.Thread):
class TornadoServerThread(threading.Thread):
- def __init__(self, host='localhost', port=8081, scheme='http', certs=None):
+ app = tornado.wsgi.WSGIContainer(TestingApp())
+
+ def __init__(self, host='localhost', scheme='http', certs=None,
+ ready_event=None):
threading.Thread.__init__(self)
self.host = host
- self.port = port
self.scheme = scheme
self.certs = certs
+ self.ready_event = ready_event
def _start_server(self):
- container = tornado.wsgi.WSGIContainer(TestingApp())
-
if self.scheme == 'https':
- http_server = tornado.httpserver.HTTPServer(container,
+ http_server = tornado.httpserver.HTTPServer(self.app,
ssl_options=self.certs)
else:
- http_server = tornado.httpserver.HTTPServer(container)
+ http_server = tornado.httpserver.HTTPServer(self.app)
- http_server.listen(self.port, address=self.host)
+ family = socket.AF_INET6 if ':' in self.host else socket.AF_INET
+ sock, = netutil.bind_sockets(None, address=self.host, family=family)
+ self.port = sock.getsockname()[1]
+ http_server.add_sockets([sock])
return http_server
def run(self):
- self.server = self._start_server()
self.ioloop = tornado.ioloop.IOLoop.instance()
+ self.server = self._start_server()
+ if self.ready_event:
+ self.ready_event.set()
self.ioloop.start()
def stop(self):
- self.server.stop()
- self.ioloop.stop()
+ self.ioloop.add_callback(self.server.stop)
+ self.ioloop.add_callback(self.ioloop.stop)
+
+
+class ProxyServerThread(TornadoServerThread):
+ app = tornado.web.Application([(r'.*', ProxyHandler)])
if __name__ == '__main__':
diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py
index 73b8f2f..a2a1da1 100644
--- a/dummyserver/testcase.py
+++ b/dummyserver/testcase.py
@@ -1,14 +1,15 @@
import unittest
-
-from threading import Lock
+import socket
+import threading
+from nose.plugins.skip import SkipTest
from dummyserver.server import (
TornadoServerThread, SocketServerThread,
DEFAULT_CERTS,
+ ProxyServerThread,
)
-
-# TODO: Change ports to auto-allocated?
+has_ipv6 = hasattr(socket, 'has_ipv6')
class SocketDummyServerTestCase(unittest.TestCase):
@@ -18,19 +19,16 @@ class SocketDummyServerTestCase(unittest.TestCase):
"""
scheme = 'http'
host = 'localhost'
- port = 18080
@classmethod
def _start_server(cls, socket_handler):
- ready_lock = Lock()
- ready_lock.acquire()
+ ready_event = threading.Event()
cls.server_thread = SocketServerThread(socket_handler=socket_handler,
- ready_lock=ready_lock,
- host=cls.host, port=cls.port)
+ ready_event=ready_event,
+ host=cls.host)
cls.server_thread.start()
-
- # Lock gets released by thread above
- ready_lock.acquire()
+ ready_event.wait()
+ cls.port = cls.server_thread.port
@classmethod
def tearDownClass(cls):
@@ -41,20 +39,19 @@ class SocketDummyServerTestCase(unittest.TestCase):
class HTTPDummyServerTestCase(unittest.TestCase):
scheme = 'http'
host = 'localhost'
- host_alt = '127.0.0.1' # Some tests need two hosts
- port = 18081
+ host_alt = '127.0.0.1' # Some tests need two hosts
certs = DEFAULT_CERTS
@classmethod
def _start_server(cls):
- cls.server_thread = TornadoServerThread(host=cls.host, port=cls.port,
+ ready_event = threading.Event()
+ cls.server_thread = TornadoServerThread(host=cls.host,
scheme=cls.scheme,
- certs=cls.certs)
+ certs=cls.certs,
+ ready_event=ready_event)
cls.server_thread.start()
-
- # TODO: Loop-check here instead
- import time
- time.sleep(0.1)
+ ready_event.wait()
+ cls.port = cls.server_thread.port
@classmethod
def _stop_server(cls):
@@ -73,5 +70,52 @@ class HTTPDummyServerTestCase(unittest.TestCase):
class HTTPSDummyServerTestCase(HTTPDummyServerTestCase):
scheme = 'https'
host = 'localhost'
- port = 18082
certs = DEFAULT_CERTS
+
+
+class HTTPDummyProxyTestCase(unittest.TestCase):
+
+ http_host = 'localhost'
+ http_host_alt = '127.0.0.1'
+
+ https_host = 'localhost'
+ https_host_alt = '127.0.0.1'
+ https_certs = DEFAULT_CERTS
+
+ proxy_host = 'localhost'
+ proxy_host_alt = '127.0.0.1'
+
+ @classmethod
+ def setUpClass(cls):
+ cls.http_thread = TornadoServerThread(host=cls.http_host,
+ scheme='http')
+ cls.http_thread._start_server()
+ cls.http_port = cls.http_thread.port
+
+ cls.https_thread = TornadoServerThread(
+ host=cls.https_host, scheme='https', certs=cls.https_certs)
+ cls.https_thread._start_server()
+ cls.https_port = cls.https_thread.port
+
+ ready_event = threading.Event()
+ cls.proxy_thread = ProxyServerThread(
+ host=cls.proxy_host, ready_event=ready_event)
+ cls.proxy_thread.start()
+ ready_event.wait()
+ cls.proxy_port = cls.proxy_thread.port
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.proxy_thread.stop()
+ cls.proxy_thread.join()
+
+
+class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase):
+ host = '::1'
+
+ @classmethod
+ def setUpClass(cls):
+ if not has_ipv6:
+ raise SkipTest('IPv6 not available')
+ else:
+ super(IPv6HTTPDummyServerTestCase, cls).setUpClass()
diff --git a/setup.cfg b/setup.cfg
index 8f6983c..8f1fee7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,6 +3,7 @@ logging-clear-handlers = true
with-coverage = true
cover-package = urllib3
cover-min-percentage = 100
+cover-erase = true
[egg_info]
tag_build =
diff --git a/test-requirements.txt b/test-requirements.txt
index 226c13d..f7c3a50 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -1,3 +1,4 @@
-nose
+nose==1.3
+mock==1.0.1
tornado==2.4.1
-coverage
+coverage==3.6
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/__init__.py
diff --git a/test/benchmark.py b/test/benchmark.py
new file mode 100644
index 0000000..e7049c4
--- /dev/null
+++ b/test/benchmark.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+"""
+Really simple rudimentary benchmark to compare ConnectionPool versus standard
+urllib to demonstrate the usefulness of connection re-using.
+"""
+from __future__ import print_function
+
+import sys
+import time
+import urllib
+
+sys.path.append('../')
+import urllib3
+
+
+# URLs to download. Doesn't matter as long as they're from the same host, so we
+# can take advantage of connection re-using.
+TO_DOWNLOAD = [
+ 'http://code.google.com/apis/apps/',
+ 'http://code.google.com/apis/base/',
+ 'http://code.google.com/apis/blogger/',
+ 'http://code.google.com/apis/calendar/',
+ 'http://code.google.com/apis/codesearch/',
+ 'http://code.google.com/apis/contact/',
+ 'http://code.google.com/apis/books/',
+ 'http://code.google.com/apis/documents/',
+ 'http://code.google.com/apis/finance/',
+ 'http://code.google.com/apis/health/',
+ 'http://code.google.com/apis/notebook/',
+ 'http://code.google.com/apis/picasaweb/',
+ 'http://code.google.com/apis/spreadsheets/',
+ 'http://code.google.com/apis/webmastertools/',
+ 'http://code.google.com/apis/youtube/',
+]
+
+
+def urllib_get(url_list):
+ assert url_list
+ for url in url_list:
+ now = time.time()
+ r = urllib.urlopen(url)
+ elapsed = time.time() - now
+ print("Got in %0.3f: %s" % (elapsed, url))
+
+
+def pool_get(url_list):
+ assert url_list
+ pool = urllib3.connection_from_url(url_list[0])
+ for url in url_list:
+ now = time.time()
+ r = pool.get_url(url)
+ elapsed = time.time() - now
+ print("Got in %0.3fs: %s" % (elapsed, url))
+
+
+if __name__ == '__main__':
+ print("Running pool_get ...")
+ now = time.time()
+ pool_get(TO_DOWNLOAD)
+ pool_elapsed = time.time() - now
+
+ print("Running urllib_get ...")
+ now = time.time()
+ urllib_get(TO_DOWNLOAD)
+ urllib_elapsed = time.time() - now
+
+ print("Completed pool_get in %0.3fs" % pool_elapsed)
+ print("Completed urllib_get in %0.3fs" % urllib_elapsed)
+
+
+"""
+Example results:
+
+Completed pool_get in 1.163s
+Completed urllib_get in 2.318s
+"""
diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py
index a7e104a..ac1768e 100644
--- a/test/test_connectionpool.py
+++ b/test/test_connectionpool.py
@@ -1,6 +1,11 @@
import unittest
-from urllib3.connectionpool import connection_from_url, HTTPConnectionPool
+from urllib3.connectionpool import (
+ connection_from_url,
+ HTTPConnection,
+ HTTPConnectionPool,
+)
+from urllib3.util import Timeout
from urllib3.packages.ssl_match_hostname import CertificateError
from urllib3.exceptions import (
ClosedPoolError,
@@ -8,7 +13,7 @@ from urllib3.exceptions import (
HostChangedError,
MaxRetryError,
SSLError,
- TimeoutError,
+ ReadTimeoutError,
)
from socket import error as SocketError, timeout as SocketTimeout
@@ -52,6 +57,7 @@ class TestConnectionPool(unittest.TestCase):
c = connection_from_url(a)
self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b))
+
def test_max_connections(self):
pool = HTTPConnectionPool(host='localhost', maxsize=1, block=True)
@@ -108,6 +114,7 @@ class TestConnectionPool(unittest.TestCase):
"Max retries exceeded with url: Test. "
"(Caused by {0}: Test)".format(str(err.__class__)))
+
def test_pool_size(self):
POOL_SIZE = 1
pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True)
@@ -122,8 +129,8 @@ class TestConnectionPool(unittest.TestCase):
self.assertEqual(pool.pool.qsize(), POOL_SIZE)
#make sure that all of the exceptions return the connection to the pool
- _test(Empty, TimeoutError)
- _test(SocketTimeout, TimeoutError)
+ _test(Empty, ReadTimeoutError)
+ _test(SocketTimeout, ReadTimeoutError)
_test(BaseSSLError, SSLError)
_test(CertificateError, SSLError)
@@ -166,6 +173,20 @@ class TestConnectionPool(unittest.TestCase):
self.assertRaises(Empty, old_pool_queue.get, block=False)
+ def test_pool_timeouts(self):
+ pool = HTTPConnectionPool(host='localhost')
+ conn = pool._new_conn()
+ self.assertEqual(conn.__class__, HTTPConnection)
+ self.assertEqual(pool.timeout.__class__, Timeout)
+ self.assertEqual(pool.timeout._read, Timeout.DEFAULT_TIMEOUT)
+ self.assertEqual(pool.timeout._connect, Timeout.DEFAULT_TIMEOUT)
+ self.assertEqual(pool.timeout.total, None)
+
+ pool = HTTPConnectionPool(host='localhost', timeout=3)
+ self.assertEqual(pool.timeout._read, 3)
+ self.assertEqual(pool.timeout._connect, 3)
+ self.assertEqual(pool.timeout.total, None)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_exceptions.py b/test/test_exceptions.py
index 3e02ca6..e20649b 100644
--- a/test/test_exceptions.py
+++ b/test/test_exceptions.py
@@ -1,19 +1,35 @@
import unittest
import pickle
-from urllib3.exceptions import HTTPError, MaxRetryError, LocationParseError
+from urllib3.exceptions import (HTTPError, MaxRetryError, LocationParseError,
+ ClosedPoolError, EmptyPoolError,
+ HostChangedError, ReadTimeoutError,
+ ConnectTimeoutError)
from urllib3.connectionpool import HTTPConnectionPool
class TestPickle(unittest.TestCase):
+ def cycle(self, item):
+ return pickle.loads(pickle.dumps(item))
+
def test_exceptions(self):
- assert pickle.dumps(HTTPError(None))
- assert pickle.dumps(MaxRetryError(None, None, None))
- assert pickle.dumps(LocationParseError(None))
+ assert self.cycle(HTTPError(None))
+ assert self.cycle(MaxRetryError(None, None, None))
+ assert self.cycle(LocationParseError(None))
+ assert self.cycle(ConnectTimeoutError(None))
def test_exceptions_with_objects(self):
- assert pickle.dumps(HTTPError('foo'))
- assert pickle.dumps(MaxRetryError(HTTPConnectionPool('localhost'), '/', None))
- assert pickle.dumps(LocationParseError('fake location'))
+ assert self.cycle(HTTPError('foo'))
+ assert self.cycle(MaxRetryError(HTTPConnectionPool('localhost'),
+ '/', None))
+ assert self.cycle(LocationParseError('fake location'))
+ assert self.cycle(ClosedPoolError(HTTPConnectionPool('localhost'),
+ None))
+ assert self.cycle(EmptyPoolError(HTTPConnectionPool('localhost'),
+ None))
+ assert self.cycle(HostChangedError(HTTPConnectionPool('localhost'),
+ '/', None))
+ assert self.cycle(ReadTimeoutError(HTTPConnectionPool('localhost'),
+ '/', None))
diff --git a/test/test_fields.py b/test/test_fields.py
new file mode 100644
index 0000000..888c2d5
--- /dev/null
+++ b/test/test_fields.py
@@ -0,0 +1,44 @@
+import unittest
+
+from urllib3.fields import guess_content_type, RequestField
+from urllib3.packages.six import b, u
+
+
+class TestRequestField(unittest.TestCase):
+
+ def test_guess_content_type(self):
+ self.assertEqual(guess_content_type('image.jpg'), 'image/jpeg')
+ self.assertEqual(guess_content_type('notsure'), 'application/octet-stream')
+ self.assertEqual(guess_content_type(None), 'application/octet-stream')
+
+ def test_create(self):
+ simple_field = RequestField('somename', 'data')
+ self.assertEqual(simple_field.render_headers(), '\r\n')
+ filename_field = RequestField('somename', 'data', filename='somefile.txt')
+ self.assertEqual(filename_field.render_headers(), '\r\n')
+ headers_field = RequestField('somename', 'data', headers={'Content-Length': 4})
+ self.assertEqual(headers_field.render_headers(),
+ 'Content-Length: 4\r\n'
+ '\r\n')
+
+ def test_make_multipart(self):
+ field = RequestField('somename', 'data')
+ field.make_multipart(content_type='image/jpg', content_location='/test')
+ self.assertEqual(field.render_headers(),
+ 'Content-Disposition: form-data; name="somename"\r\n'
+ 'Content-Type: image/jpg\r\n'
+ 'Content-Location: /test\r\n'
+ '\r\n')
+
+ def test_render_parts(self):
+ field = RequestField('somename', 'data')
+ parts = field._render_parts({'name': 'value', 'filename': 'value'})
+ self.assertTrue('name="value"' in parts)
+ self.assertTrue('filename="value"' in parts)
+ parts = field._render_parts([('name', 'value'), ('filename', 'value')])
+ self.assertEqual(parts, 'name="value"; filename="value"')
+
+ def test_render_part(self):
+ field = RequestField('somename', 'data')
+ param = field._render_part('filename', u('n\u00e4me'))
+ self.assertEqual(param, "filename*=utf-8''n%C3%A4me")
diff --git a/test/test_filepost.py b/test/test_filepost.py
index 70ab100..ca33d61 100644
--- a/test/test_filepost.py
+++ b/test/test_filepost.py
@@ -1,6 +1,7 @@
import unittest
from urllib3.filepost import encode_multipart_formdata, iter_fields
+from urllib3.fields import RequestField
from urllib3.packages.six import b, u
@@ -117,3 +118,16 @@ class TestMultipartEncoding(unittest.TestCase):
self.assertEqual(content_type,
'multipart/form-data; boundary=' + str(BOUNDARY))
+
+ def test_request_fields(self):
+ fields = [RequestField('k', b'v', filename='somefile.txt', headers={'Content-Type': 'image/jpeg'})]
+
+ encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
+
+ self.assertEquals(encoded,
+ b'--' + b(BOUNDARY) + b'\r\n'
+ b'Content-Type: image/jpeg\r\n'
+ b'\r\n'
+ b'v\r\n'
+ b'--' + b(BOUNDARY) + b'--\r\n'
+ )
diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py
index 64c86e8..e7b5c48 100644
--- a/test/test_proxymanager.py
+++ b/test/test_proxymanager.py
@@ -5,7 +5,7 @@ from urllib3.poolmanager import ProxyManager
class TestProxyManager(unittest.TestCase):
def test_proxy_headers(self):
- p = ProxyManager(None)
+ p = ProxyManager('http://something:1234')
url = 'http://pypi.python.org/test'
# Verify default headers
@@ -23,5 +23,21 @@ class TestProxyManager(unittest.TestCase):
self.assertEqual(headers, provided_headers)
+ # Verify proxy with nonstandard port
+ provided_headers = {'Accept': 'application/json'}
+ expected_headers = provided_headers.copy()
+ expected_headers.update({'Host': 'pypi.python.org:8080'})
+ url_with_port = 'http://pypi.python.org:8080/test'
+ headers = p._set_proxy_headers(url_with_port, provided_headers)
+
+ self.assertEqual(headers, expected_headers)
+
+ def test_default_port(self):
+ p = ProxyManager('http://something')
+ self.assertEqual(p.proxy.port, 80)
+ p = ProxyManager('https://something')
+ self.assertEqual(p.proxy.port, 443)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_response.py b/test/test_response.py
index 199e379..90d34eb 100644
--- a/test/test_response.py
+++ b/test/test_response.py
@@ -1,6 +1,6 @@
import unittest
-from io import BytesIO
+from io import BytesIO, BufferedReader
from urllib3.response import HTTPResponse
from urllib3.exceptions import DecodeError
@@ -112,5 +112,138 @@ class TestResponse(unittest.TestCase):
self.assertEqual(r.read(1), b'f')
self.assertEqual(r.read(2), b'oo')
+ def test_io(self):
+ import socket
+ try:
+ from http.client import HTTPResponse as OldHTTPResponse
+ except:
+ from httplib import HTTPResponse as OldHTTPResponse
+
+ fp = BytesIO(b'foo')
+ resp = HTTPResponse(fp, preload_content=False)
+
+ self.assertEqual(resp.closed, False)
+ self.assertEqual(resp.readable(), True)
+ self.assertEqual(resp.writable(), False)
+ self.assertRaises(IOError, resp.fileno)
+
+ resp.close()
+ self.assertEqual(resp.closed, True)
+
+ # Try closing with an `httplib.HTTPResponse`, because it has an
+ # `isclosed` method.
+ hlr = OldHTTPResponse(socket.socket())
+ resp2 = HTTPResponse(hlr, preload_content=False)
+ self.assertEqual(resp2.closed, False)
+ resp2.close()
+ self.assertEqual(resp2.closed, True)
+
+ #also try when only data is present.
+ resp3 = HTTPResponse('foodata')
+ self.assertRaises(IOError, resp3.fileno)
+
+ resp3._fp = 2
+ # A corner case where _fp is present but doesn't have `closed`,
+ # `isclosed`, or `fileno`. Unlikely, but possible.
+ self.assertEqual(resp3.closed, True)
+ self.assertRaises(IOError, resp3.fileno)
+
+ def test_io_bufferedreader(self):
+ fp = BytesIO(b'foo')
+ resp = HTTPResponse(fp, preload_content=False)
+ br = BufferedReader(resp)
+
+ self.assertEqual(br.read(), b'foo')
+
+ br.close()
+ self.assertEqual(resp.closed, True)
+
+ def test_streaming(self):
+ fp = BytesIO(b'foo')
+ resp = HTTPResponse(fp, preload_content=False)
+ stream = resp.stream(2, decode_content=False)
+
+ self.assertEqual(next(stream), b'fo')
+ self.assertEqual(next(stream), b'o')
+ self.assertRaises(StopIteration, next, stream)
+
+ def test_gzipped_streaming(self):
+ import zlib
+ compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
+ data = compress.compress(b'foo')
+ data += compress.flush()
+
+ fp = BytesIO(data)
+ resp = HTTPResponse(fp, headers={'content-encoding': 'gzip'},
+ preload_content=False)
+ stream = resp.stream(2)
+
+ self.assertEqual(next(stream), b'f')
+ self.assertEqual(next(stream), b'oo')
+ self.assertRaises(StopIteration, next, stream)
+
+ def test_deflate_streaming(self):
+ import zlib
+ data = zlib.compress(b'foo')
+
+ fp = BytesIO(data)
+ resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'},
+ preload_content=False)
+ stream = resp.stream(2)
+
+ self.assertEqual(next(stream), b'f')
+ self.assertEqual(next(stream), b'oo')
+ self.assertRaises(StopIteration, next, stream)
+
+ def test_deflate2_streaming(self):
+ import zlib
+ compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
+ data = compress.compress(b'foo')
+ data += compress.flush()
+
+ fp = BytesIO(data)
+ resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'},
+ preload_content=False)
+ stream = resp.stream(2)
+
+ self.assertEqual(next(stream), b'f')
+ self.assertEqual(next(stream), b'oo')
+ self.assertRaises(StopIteration, next, stream)
+
+ def test_empty_stream(self):
+ fp = BytesIO(b'')
+ resp = HTTPResponse(fp, preload_content=False)
+ stream = resp.stream(2, decode_content=False)
+
+ self.assertRaises(StopIteration, next, stream)
+
+ def test_mock_httpresponse_stream(self):
+ # Mock out a HTTP Request that does enough to make it through urllib3's
+ # read() and close() calls, and also exhausts and underlying file
+ # object.
+ class MockHTTPRequest(object):
+ self.fp = None
+
+ def read(self, amt):
+ data = self.fp.read(amt)
+ if not data:
+ self.fp = None
+
+ return data
+
+ def close(self):
+ self.fp = None
+
+ bio = BytesIO(b'foo')
+ fp = MockHTTPRequest()
+ fp.fp = bio
+ resp = HTTPResponse(fp, preload_content=False)
+ stream = resp.stream(2)
+
+ self.assertEqual(next(stream), b'fo')
+ self.assertEqual(next(stream), b'o')
+ self.assertRaises(StopIteration, next, stream)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_util.py b/test/test_util.py
index a989da6..b465fef 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -1,10 +1,23 @@
-import unittest
import logging
+import unittest
+
+from mock import patch
from urllib3 import add_stderr_logger
-from urllib3.util import get_host, make_headers, split_first, parse_url, Url
-from urllib3.exceptions import LocationParseError
+from urllib3.util import (
+ get_host,
+ make_headers,
+ split_first,
+ parse_url,
+ Timeout,
+ Url,
+)
+from urllib3.exceptions import LocationParseError, TimeoutStateError
+# This number represents a time in seconds, it doesn't mean anything in
+# isolation. Setting to a high-ish value to avoid conflicts with the smaller
+# numbers used for timeouts
+TIMEOUT_EPOCH = 1000
class TestUtil(unittest.TestCase):
def test_get_host(self):
@@ -34,20 +47,20 @@ class TestUtil(unittest.TestCase):
'http://173.194.35.7:80/test': ('http', '173.194.35.7', 80),
# IPv6
- '[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None),
- 'http://[2a00:1450:4001:c01::67]': ('http', '2a00:1450:4001:c01::67', None),
- 'http://[2a00:1450:4001:c01::67]/test': ('http', '2a00:1450:4001:c01::67', None),
- 'http://[2a00:1450:4001:c01::67]:80': ('http', '2a00:1450:4001:c01::67', 80),
- 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '2a00:1450:4001:c01::67', 80),
+ '[2a00:1450:4001:c01::67]': ('http', '[2a00:1450:4001:c01::67]', None),
+ 'http://[2a00:1450:4001:c01::67]': ('http', '[2a00:1450:4001:c01::67]', None),
+ 'http://[2a00:1450:4001:c01::67]/test': ('http', '[2a00:1450:4001:c01::67]', None),
+ 'http://[2a00:1450:4001:c01::67]:80': ('http', '[2a00:1450:4001:c01::67]', 80),
+ 'http://[2a00:1450:4001:c01::67]:80/test': ('http', '[2a00:1450:4001:c01::67]', 80),
# More IPv6 from http://www.ietf.org/rfc/rfc2732.txt
- 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210', 8000),
- 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '1080:0:0:0:8:800:200C:417A', None),
- 'http://[3ffe:2a00:100:7031::1]': ('http', '3ffe:2a00:100:7031::1', None),
- 'http://[1080::8:800:200C:417A]/foo': ('http', '1080::8:800:200C:417A', None),
- 'http://[::192.9.5.5]/ipng': ('http', '::192.9.5.5', None),
- 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '::FFFF:129.144.52.38', 42),
- 'http://[2010:836B:4179::836B:4179]': ('http', '2010:836B:4179::836B:4179', None),
+ 'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8000/index.html': ('http', '[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]', 8000),
+ 'http://[1080:0:0:0:8:800:200C:417A]/index.html': ('http', '[1080:0:0:0:8:800:200C:417A]', None),
+ 'http://[3ffe:2a00:100:7031::1]': ('http', '[3ffe:2a00:100:7031::1]', None),
+ 'http://[1080::8:800:200C:417A]/foo': ('http', '[1080::8:800:200C:417A]', None),
+ 'http://[::192.9.5.5]/ipng': ('http', '[::192.9.5.5]', None),
+ 'http://[::FFFF:129.144.52.38]:42/index.html': ('http', '[::FFFF:129.144.52.38]', 42),
+ 'http://[2010:836B:4179::836B:4179]': ('http', '[2010:836B:4179::836B:4179]', None),
}
for url, expected_host in url_host_map.items():
returned_host = get_host(url)
@@ -57,6 +70,8 @@ class TestUtil(unittest.TestCase):
# TODO: Add more tests
invalid_host = [
'http://google.com:foo',
+ 'http://::1/',
+ 'http://::1:80/',
]
for location in invalid_host:
@@ -83,6 +98,9 @@ class TestUtil(unittest.TestCase):
returned_url = parse_url(url)
self.assertEquals(returned_url, expected_url)
+ def test_parse_url_invalid_IPv6(self):
+ self.assertRaises(ValueError, parse_url, '[::1')
+
def test_request_uri(self):
url_host_map = {
'http://google.com/mail': '/mail',
@@ -99,6 +117,17 @@ class TestUtil(unittest.TestCase):
returned_url = parse_url(url)
self.assertEquals(returned_url.request_uri, expected_request_uri)
+ def test_netloc(self):
+ url_netloc_map = {
+ 'http://google.com/mail': 'google.com',
+ 'http://google.com:80/mail': 'google.com:80',
+ 'google.com/foobar': 'google.com',
+ 'google.com:12345': 'google.com:12345',
+ }
+
+ for url, expected_netloc in url_netloc_map.items():
+ self.assertEquals(parse_url(url).netloc, expected_netloc)
+
def test_make_headers(self):
self.assertEqual(
make_headers(accept_encoding=True),
@@ -148,3 +177,99 @@ class TestUtil(unittest.TestCase):
logger.debug('Testing add_stderr_logger')
logger.removeHandler(handler)
+
+ def _make_time_pass(self, seconds, timeout, time_mock):
+ """ Make some time pass for the timeout object """
+ time_mock.return_value = TIMEOUT_EPOCH
+ timeout.start_connect()
+ time_mock.return_value = TIMEOUT_EPOCH + seconds
+ return timeout
+
+ def test_invalid_timeouts(self):
+ try:
+ Timeout(total=-1)
+ self.fail("negative value should throw exception")
+ except ValueError as e:
+ self.assertTrue('less than' in str(e))
+ try:
+ Timeout(connect=2, total=-1)
+ self.fail("negative value should throw exception")
+ except ValueError as e:
+ self.assertTrue('less than' in str(e))
+
+ try:
+ Timeout(read=-1)
+ self.fail("negative value should throw exception")
+ except ValueError as e:
+ self.assertTrue('less than' in str(e))
+
+ # Booleans are allowed also by socket.settimeout and converted to the
+ # equivalent float (1.0 for True, 0.0 for False)
+ Timeout(connect=False, read=True)
+
+ try:
+ Timeout(read="foo")
+ self.fail("string value should not be allowed")
+ except ValueError as e:
+ self.assertTrue('int or float' in str(e))
+
+
+ @patch('urllib3.util.current_time')
+ def test_timeout(self, current_time):
+ timeout = Timeout(total=3)
+
+ # make 'no time' elapse
+ timeout = self._make_time_pass(seconds=0, timeout=timeout,
+ time_mock=current_time)
+ self.assertEqual(timeout.read_timeout, 3)
+ self.assertEqual(timeout.connect_timeout, 3)
+
+ timeout = Timeout(total=3, connect=2)
+ self.assertEqual(timeout.connect_timeout, 2)
+
+ timeout = Timeout()
+ self.assertEqual(timeout.connect_timeout, Timeout.DEFAULT_TIMEOUT)
+
+ # Connect takes 5 seconds, leaving 5 seconds for read
+ timeout = Timeout(total=10, read=7)
+ timeout = self._make_time_pass(seconds=5, timeout=timeout,
+ time_mock=current_time)
+ self.assertEqual(timeout.read_timeout, 5)
+
+ # Connect takes 2 seconds, read timeout still 7 seconds
+ timeout = Timeout(total=10, read=7)
+ timeout = self._make_time_pass(seconds=2, timeout=timeout,
+ time_mock=current_time)
+ self.assertEqual(timeout.read_timeout, 7)
+
+ timeout = Timeout(total=10, read=7)
+ self.assertEqual(timeout.read_timeout, 7)
+
+ timeout = Timeout(total=None, read=None, connect=None)
+ self.assertEqual(timeout.connect_timeout, None)
+ self.assertEqual(timeout.read_timeout, None)
+ self.assertEqual(timeout.total, None)
+
+
+ def test_timeout_str(self):
+ timeout = Timeout(connect=1, read=2, total=3)
+ self.assertEqual(str(timeout), "Timeout(connect=1, read=2, total=3)")
+ timeout = Timeout(connect=1, read=None, total=3)
+ self.assertEqual(str(timeout), "Timeout(connect=1, read=None, total=3)")
+
+
+ @patch('urllib3.util.current_time')
+ def test_timeout_elapsed(self, current_time):
+ current_time.return_value = TIMEOUT_EPOCH
+ timeout = Timeout(total=3)
+ self.assertRaises(TimeoutStateError, timeout.get_connect_duration)
+
+ timeout.start_connect()
+ self.assertRaises(TimeoutStateError, timeout.start_connect)
+
+ current_time.return_value = TIMEOUT_EPOCH + 2
+ self.assertEqual(timeout.get_connect_duration(), 2)
+ current_time.return_value = TIMEOUT_EPOCH + 37
+ self.assertEqual(timeout.get_connect_duration(), 37)
+
+
diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO
index 661e33b..a81ab9c 100644
--- a/urllib3.egg-info/PKG-INFO
+++ b/urllib3.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 1.1
+Metadata-Version: 1.0
Name: urllib3
-Version: 1.6
+Version: 1.7.1
Summary: HTTP library with thread-safe connection pooling, file post, and more.
Home-page: http://urllib3.readthedocs.org/
Author: Andrey Petrov
@@ -25,7 +25,7 @@ Description: =======
- Supports gzip and deflate decoding.
- Thread-safe and sanity-safe.
- Works with AppEngine, gevent, and eventlib.
- - Tested on Python 2.6+ and Python 3.3+, 100% unit test coverage.
+ - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage.
- Small and easy to understand codebase perfect for extending and building upon.
For a more comprehensive solution, have a look at
`Requests <http://python-requests.org/>`_ which is also powered by urllib3.
@@ -89,14 +89,18 @@ Description: =======
Run the tests
=============
- We use some external dependencies to run the urllib3 test suite. Easiest way to
- run the tests is thusly from the urllib3 source root: ::
+ We use some external dependencies, multiple interpreters and code coverage
+ analysis while running test suite. Easiest way to run the tests is thusly the
+ ``tox`` utility: ::
- $ pip install -r test-requirements.txt
- $ nosetests
- .....................................................
+ $ tox
+ # [..]
+ py26: commands succeeded
+ py27: commands succeeded
+ py32: commands succeeded
+ py33: commands succeeded
- Success! You could also ``pip install coverage`` to get code coverage reporting.
+ Note that code coverage less than 100% is regarded as a failing run.
Contributing
@@ -117,6 +121,54 @@ Description: =======
Changes
=======
+ 1.7.1 (2013-09-25)
+ ++++++++++++++++++
+
+ * Added granular timeout support with new `urllib3.util.Timeout` class.
+ (Issue #231)
+
+ * Fixed Python 3.4 support. (Issue #238)
+
+
+ 1.7 (2013-08-14)
+ ++++++++++++++++
+
+ * More exceptions are now pickle-able, with tests. (Issue #174)
+
+ * Fixed redirecting with relative URLs in Location header. (Issue #178)
+
+ * Support for relative urls in ``Location: ...`` header. (Issue #179)
+
+ * ``urllib3.response.HTTPResponse`` now inherits from ``io.IOBase`` for bonus
+ file-like functionality. (Issue #187)
+
+ * Passing ``assert_hostname=False`` when creating a HTTPSConnectionPool will
+ skip hostname verification for SSL connections. (Issue #194)
+
+ * New method ``urllib3.response.HTTPResponse.stream(...)`` which acts as a
+ generator wrapped around ``.read(...)``. (Issue #198)
+
+ * IPv6 url parsing enforces brackets around the hostname. (Issue #199)
+
+ * Fixed thread race condition in
+ ``urllib3.poolmanager.PoolManager.connection_from_host(...)`` (Issue #204)
+
+ * ``ProxyManager`` requests now include non-default port in ``Host: ...``
+ header. (Issue #217)
+
+ * Added HTTPS proxy support in ``ProxyManager``. (Issue #170 #139)
+
+ * New ``RequestField`` object can be passed to the ``fields=...`` param which
+ can specify headers. (Issue #220)
+
+ * Raise ``urllib3.exceptions.ProxyError`` when connecting to proxy fails.
+ (Issue #221)
+
+ * Use international headers when posting file names. (Issue #119)
+
+ * Improved IPv6 support. (Issue #203)
+
+
1.6 (2013-04-25)
++++++++++++++++
diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt
index 69ec475..32759d9 100644
--- a/urllib3.egg-info/SOURCES.txt
+++ b/urllib3.egg-info/SOURCES.txt
@@ -8,11 +8,15 @@ setup.py
test-requirements.txt
dummyserver/__init__.py
dummyserver/handlers.py
+dummyserver/proxy.py
dummyserver/server.py
dummyserver/testcase.py
+test/__init__.py
+test/benchmark.py
test/test_collections.py
test/test_connectionpool.py
test/test_exceptions.py
+test/test_fields.py
test/test_filepost.py
test/test_poolmanager.py
test/test_proxymanager.py
@@ -22,6 +26,7 @@ urllib3/__init__.py
urllib3/_collections.py
urllib3/connectionpool.py
urllib3/exceptions.py
+urllib3/fields.py
urllib3/filepost.py
urllib3/poolmanager.py
urllib3/request.py
diff --git a/urllib3/__init__.py b/urllib3/__init__.py
index ebd43b3..eed7006 100644
--- a/urllib3/__init__.py
+++ b/urllib3/__init__.py
@@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using.
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
-__version__ = '1.6'
+__version__ = '1.7.1'
from .connectionpool import (
@@ -23,7 +23,7 @@ from . import exceptions
from .filepost import encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
-from .util import make_headers, get_host
+from .util import make_headers, get_host, Timeout
# Set default logging handler to avoid "No handler found" warnings.
diff --git a/urllib3/_collections.py b/urllib3/_collections.py
index b35a736..282b8d5 100644
--- a/urllib3/_collections.py
+++ b/urllib3/_collections.py
@@ -5,7 +5,7 @@
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from collections import MutableMapping
-from threading import Lock
+from threading import RLock
try: # Python 2.7+
from collections import OrderedDict
@@ -40,18 +40,18 @@ class RecentlyUsedContainer(MutableMapping):
self.dispose_func = dispose_func
self._container = self.ContainerCls()
- self._lock = Lock()
+ self.lock = RLock()
def __getitem__(self, key):
# Re-insert the item, moving it to the end of the eviction line.
- with self._lock:
+ with self.lock:
item = self._container.pop(key)
self._container[key] = item
return item
def __setitem__(self, key, value):
evicted_value = _Null
- with self._lock:
+ with self.lock:
# Possibly evict the existing value of 'key'
evicted_value = self._container.get(key, _Null)
self._container[key] = value
@@ -65,21 +65,21 @@ class RecentlyUsedContainer(MutableMapping):
self.dispose_func(evicted_value)
def __delitem__(self, key):
- with self._lock:
+ with self.lock:
value = self._container.pop(key)
if self.dispose_func:
self.dispose_func(value)
def __len__(self):
- with self._lock:
+ with self.lock:
return len(self._container)
def __iter__(self):
raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.')
def clear(self):
- with self._lock:
+ with self.lock:
# Copy pointers to all values, then wipe the mapping
# under Python 2, this copies the list of values twice :-|
values = list(self._container.values())
@@ -90,5 +90,5 @@ class RecentlyUsedContainer(MutableMapping):
self.dispose_func(value)
def keys(self):
- with self._lock:
+ with self.lock:
return self._container.keys()
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index 73fa9ca..691d4e2 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -4,12 +4,11 @@
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
-import logging
-import socket
import errno
+import logging
from socket import error as SocketError, timeout as SocketTimeout
-from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint
+import socket
try: # Python 3
from http.client import HTTPConnection, HTTPException
@@ -22,11 +21,15 @@ try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
+ import Queue as _ # Platform-specific: Windows
try: # Compiled with SSL?
HTTPSConnection = object
- BaseSSLError = None
+
+ class BaseSSLError(BaseException):
+ pass
+
ssl = None
try: # Python 3
@@ -41,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL.
pass
-from .request import RequestMethods
-from .response import HTTPResponse
-from .util import get_host, is_connection_dropped, ssl_wrap_socket
from .exceptions import (
ClosedPoolError,
+ ConnectTimeoutError,
EmptyPoolError,
HostChangedError,
MaxRetryError,
SSLError,
- TimeoutError,
+ ReadTimeoutError,
+ ProxyError,
)
-
-from .packages.ssl_match_hostname import match_hostname, CertificateError
+from .packages.ssl_match_hostname import CertificateError, match_hostname
from .packages import six
-
+from .request import RequestMethods
+from .response import HTTPResponse
+from .util import (
+ assert_fingerprint,
+ get_host,
+ is_connection_dropped,
+ resolve_cert_reqs,
+ resolve_ssl_version,
+ ssl_wrap_socket,
+ Timeout,
+)
xrange = six.moves.xrange
@@ -93,11 +104,24 @@ class VerifiedHTTPSConnection(HTTPSConnection):
def connect(self):
# Add certificate verification
- sock = socket.create_connection((self.host, self.port), self.timeout)
+ try:
+ sock = socket.create_connection(
+ address=(self.host, self.port),
+ timeout=self.timeout)
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, self.timeout))
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
+ if self._tunnel_host:
+ self.sock = sock
+ # Calls self._set_hostport(), so self.host is
+ # self._tunnel_host below.
+ self._tunnel()
+
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
@@ -110,10 +134,11 @@ class VerifiedHTTPSConnection(HTTPSConnection):
if self.assert_fingerprint:
assert_fingerprint(self.sock.getpeercert(binary_form=True),
self.assert_fingerprint)
- else:
+ elif self.assert_hostname is not False:
match_hostname(self.sock.getpeercert(),
self.assert_hostname or self.host)
+
## Pool objects
class ConnectionPool(object):
@@ -126,6 +151,9 @@ class ConnectionPool(object):
QueueCls = LifoQueue
def __init__(self, host, port=None):
+ # httplib doesn't like it when we include brackets in ipv6 addresses
+ host = host.strip('[]')
+
self.host = host
self.port = port
@@ -133,6 +161,8 @@ class ConnectionPool(object):
return '%s(host=%r, port=%r)' % (type(self).__name__,
self.host, self.port)
+# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
+_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
@@ -151,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
as a valid HTTP/1.0 or 1.1 status line, passed into
:class:`httplib.HTTPConnection`.
+ .. note::
+ Only works in Python 2. This parameter is ignored in Python 3.
+
:param timeout:
- Socket timeout for each individual connection, can be a float. None
- disables timeout.
+ Socket timeout in seconds for each individual connection. This can
+ be a float or integer, which sets the timeout for the HTTP request,
+ or an instance of :class:`urllib3.util.Timeout` which gives you more
+ fine-grained control over request timeouts. After the constructor has
+ been parsed, this is always a `urllib3.util.Timeout` object.
:param maxsize:
Number of connections to save that can be reused. More than 1 is useful
@@ -171,20 +207,39 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
+
+ :param _proxy:
+ Parsed proxy URL, should not be used directly, instead, see
+ :class:`urllib3.connectionpool.ProxyManager`"
+
+ :param _proxy_headers:
+ A dictionary with proxy headers, should not be used directly,
+ instead, see :class:`urllib3.connectionpool.ProxyManager`"
"""
scheme = 'http'
- def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
- block=False, headers=None):
+ def __init__(self, host, port=None, strict=False,
+ timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
+ headers=None, _proxy=None, _proxy_headers=None):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)
self.strict = strict
+
+ # This is for backwards compatibility and can be removed once a timeout
+ # can only be set to a Timeout object
+ if not isinstance(timeout, Timeout):
+ timeout = Timeout.from_float(timeout)
+
self.timeout = timeout
+
self.pool = self.QueueCls(maxsize)
self.block = block
+ self.proxy = _proxy
+ self.proxy_headers = _proxy_headers or {}
+
# Fill the queue up so that doing get() on it will block properly
for _ in xrange(maxsize):
self.pool.put(None)
@@ -200,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
- return HTTPConnection(host=self.host,
- port=self.port,
- strict=self.strict)
+ extra_params = {}
+ if not six.PY3: # Python 2
+ extra_params['strict'] = self.strict
+
+ return HTTPConnection(host=self.host, port=self.port,
+ timeout=self.timeout.connect_timeout,
+ **extra_params)
+
def _get_conn(self, timeout=None):
"""
@@ -263,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
% self.host)
# Connection never got put back into the pool, close it.
- conn.close()
+ if conn:
+ conn.close()
+
+ def _get_timeout(self, timeout):
+ """ Helper that always returns a :class:`urllib3.util.Timeout` """
+ if timeout is _Default:
+ return self.timeout.clone()
+
+ if isinstance(timeout, Timeout):
+ return timeout.clone()
+ else:
+ # User passed us an int/float. This is for backwards compatibility,
+ # can be removed later
+ return Timeout.from_float(timeout)
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
Perform a request on a given httplib connection object taken from our
pool.
+
+ :param conn:
+ a connection from one of our connection pools
+
+ :param timeout:
+ Socket timeout in seconds for the request. This can be a
+ float or integer, which will set the same timeout value for
+ the socket connect and the socket read, or an instance of
+ :class:`urllib3.util.Timeout`, which gives you more fine-grained
+ control over your timeouts.
"""
self.num_requests += 1
- if timeout is _Default:
- timeout = self.timeout
-
- conn.timeout = timeout # This only does anything in Py26+
- conn.request(method, url, **httplib_request_kw)
+ timeout_obj = self._get_timeout(timeout)
- # Set timeout
- sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr.
- if sock:
- sock.settimeout(timeout)
+ try:
+ timeout_obj.start_connect()
+ conn.timeout = timeout_obj.connect_timeout
+ # conn.request() calls httplib.*.request, not the method in
+ # request.py. It also calls makefile (recv) on the socket
+ conn.request(method, url, **httplib_request_kw)
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, timeout_obj.connect_timeout))
+
+ # Reset the timeout for the recv() on the socket
+ read_timeout = timeout_obj.read_timeout
+ log.debug("Setting read timeout to %s" % read_timeout)
+ # App Engine doesn't have a sock attr
+ if hasattr(conn, 'sock') and \
+ read_timeout is not None and \
+ read_timeout is not Timeout.DEFAULT_TIMEOUT:
+ # In Python 3 socket.py will catch EAGAIN and return None when you
+ # try and read into the file pointer created by http.client, which
+ # instead raises a BadStatusLine exception. Instead of catching
+ # the exception and assuming all BadStatusLine exceptions are read
+ # timeouts, check for a zero timeout before making the request.
+ if read_timeout == 0:
+ raise ReadTimeoutError(
+ self, url,
+ "Read timed out. (read timeout=%s)" % read_timeout)
+ conn.sock.settimeout(read_timeout)
+
+ # Receive the response from the server
+ try:
+ try: # Python 2.7+, use buffering of HTTP responses
+ httplib_response = conn.getresponse(buffering=True)
+ except TypeError: # Python 2.6 and older
+ httplib_response = conn.getresponse()
+ except SocketTimeout:
+ raise ReadTimeoutError(
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
+
+ except SocketError as e: # Platform-specific: Python 2
+ # See the above comment about EAGAIN in Python 3. In Python 2 we
+ # have to specifically catch it and throw the timeout error
+ if e.errno in _blocking_errnos:
+ raise ReadTimeoutError(
+ self, url,
+ "Read timed out. (read timeout=%s)" % read_timeout)
+ raise
- try: # Python 2.7+, use buffering of HTTP responses
- httplib_response = conn.getresponse(buffering=True)
- except TypeError: # Python 2.6 and older
- httplib_response = conn.getresponse()
# AppEngine doesn't have a version attr.
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
@@ -367,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
- 303, 307). Each redirect counts as a retry.
+ 303, 307, 308). Each redirect counts as a retry.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
@@ -375,7 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
use the pool on an HTTP proxy and request foreign hosts.
:param timeout:
- If specified, overrides the default timeout for this one request.
+ If specified, overrides the default timeout for this one
+ request. It may be a float (in seconds) or an instance of
+ :class:`urllib3.util.Timeout`.
:param pool_timeout:
If set and the pool is set to block=True, then this method will
@@ -402,18 +522,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if retries < 0:
raise MaxRetryError(self, url)
- if timeout is _Default:
- timeout = self.timeout
-
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
- host = "%s://%s" % (self.scheme, self.host)
- if self.port:
- host = "%s:%d" % (host, self.port)
-
raise HostChangedError(self, url, retries - 1)
conn = None
@@ -444,18 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# ``response.release_conn()`` is called (implicitly by
# ``response.read()``)
- except Empty as e:
+ except Empty:
# Timed out by queue
- raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" %
- pool_timeout)
+ raise ReadTimeoutError(
+ self, url, "Read timed out, no pool connections are available.")
- except SocketTimeout as e:
+ except SocketTimeout:
# Timed out by socket
- raise TimeoutError(self, "Request timed out. (timeout=%s)" %
- timeout)
+ raise ReadTimeoutError(self, url, "Read timed out.")
except BaseSSLError as e:
# SSL certificate error
+ if 'timed out' in str(e) or \
+ 'did not complete (read)' in str(e): # Platform-specific: Python 2.6
+ raise ReadTimeoutError(self, url, "Read timed out.")
raise SSLError(e)
except CertificateError as e:
@@ -463,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise SSLError(e)
except (HTTPException, SocketError) as e:
+ if isinstance(e, SocketError) and self.proxy is not None:
+ raise ProxyError('Cannot connect to proxy. '
+ 'Socket error: %s.' % e)
+
# Connection broken, discard. It will be replaced next _get_conn().
conn = None
# This is necessary so we can access e below
@@ -511,6 +630,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
``assert_hostname`` and ``host`` in this order to verify connections.
+ If ``assert_hostname`` is False, no verification is done.
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and
``ssl_version`` are only used if :mod:`ssl` is available and are fed into
@@ -523,13 +643,13 @@ class HTTPSConnectionPool(HTTPConnectionPool):
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
+ _proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
assert_hostname=None, assert_fingerprint=None):
- HTTPConnectionPool.__init__(self, host, port,
- strict, timeout, maxsize,
- block, headers)
+ HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
+ block, headers, _proxy, _proxy_headers)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
@@ -538,6 +658,34 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
+ def _prepare_conn(self, connection):
+ """
+ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
+ and establish the tunnel if proxy is used.
+ """
+
+ if isinstance(connection, VerifiedHTTPSConnection):
+ connection.set_cert(key_file=self.key_file,
+ cert_file=self.cert_file,
+ cert_reqs=self.cert_reqs,
+ ca_certs=self.ca_certs,
+ assert_hostname=self.assert_hostname,
+ assert_fingerprint=self.assert_fingerprint)
+ connection.ssl_version = self.ssl_version
+
+ if self.proxy is not None:
+ # Python 2.7+
+ try:
+ set_tunnel = connection.set_tunnel
+ except AttributeError: # Platform-specific: Python 2.6
+ set_tunnel = connection._set_tunnel
+ set_tunnel(self.host, self.port, self.proxy_headers)
+ # Establish tunnel connection early, because otherwise httplib
+ # would improperly set Host: header to proxy's IP:port.
+ connection.connect()
+
+ return connection
+
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPSConnection`.
@@ -546,26 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
+ actual_host = self.host
+ actual_port = self.port
+ if self.proxy is not None:
+ actual_host = self.proxy.host
+ actual_port = self.proxy.port
+
if not ssl: # Platform-specific: Python compiled without +ssl
if not HTTPSConnection or HTTPSConnection is object:
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")
-
- return HTTPSConnection(host=self.host,
- port=self.port,
- strict=self.strict)
-
- connection = VerifiedHTTPSConnection(host=self.host,
- port=self.port,
- strict=self.strict)
- connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
- cert_reqs=self.cert_reqs, ca_certs=self.ca_certs,
- assert_hostname=self.assert_hostname,
- assert_fingerprint=self.assert_fingerprint)
-
- connection.ssl_version = self.ssl_version
-
- return connection
+ connection_class = HTTPSConnection
+ else:
+ connection_class = VerifiedHTTPSConnection
+
+ extra_params = {}
+ if not six.PY3: # Python 2
+ extra_params['strict'] = self.strict
+ connection = connection_class(host=actual_host, port=actual_port,
+ timeout=self.timeout.connect_timeout,
+ **extra_params)
+
+ return self._prepare_conn(connection)
def connection_from_url(url, **kw):
diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py
index 277ee0b..b8cd933 100644
--- a/urllib3/contrib/ntlmpool.py
+++ b/urllib3/contrib/ntlmpool.py
@@ -33,7 +33,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
def __init__(self, user, pw, authurl, *args, **kwargs):
"""
authurl is a random URL on the server that is protected by NTLM.
- user is the Windows user, probably in the DOMAIN\username format.
+ user is the Windows user, probably in the DOMAIN\\username format.
pw is the password for the user.
"""
super(NTLMConnectionPool, self).__init__(*args, **kwargs)
diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py
index 5c4c6d8..d43bcd6 100644
--- a/urllib3/contrib/pyopenssl.py
+++ b/urllib3/contrib/pyopenssl.py
@@ -20,13 +20,13 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
'''
-from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification,
- SUBJ_ALT_NAME_SUPPORT)
+from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
from ndg.httpsclient.subj_alt_name import SubjectAltName
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
from socket import _fileobject
import ssl
+from cStringIO import StringIO
from .. import connectionpool
from .. import util
@@ -99,6 +99,172 @@ def get_subj_alt_name(peer_cert):
return dns_name
+class fileobject(_fileobject):
+
+ def read(self, size=-1):
+ # Use max, disallow tiny reads in a loop as they are very inefficient.
+ # We never leave read() with any leftover data from a new recv() call
+ # in our internal buffer.
+ rbufsize = max(self._rbufsize, self.default_bufsize)
+ # Our use of StringIO rather than lists of string objects returned by
+ # recv() minimizes memory usage and fragmentation that occurs when
+ # rbufsize is large compared to the typical return value of recv().
+ buf = self._rbuf
+ buf.seek(0, 2) # seek end
+ if size < 0:
+ # Read until EOF
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
+ while True:
+ try:
+ data = self._sock.recv(rbufsize)
+ except OpenSSL.SSL.WantReadError:
+ continue
+ if not data:
+ break
+ buf.write(data)
+ return buf.getvalue()
+ else:
+ # Read until size bytes or EOF seen, whichever comes first
+ buf_len = buf.tell()
+ if buf_len >= size:
+ # Already have size bytes in our buffer? Extract and return.
+ buf.seek(0)
+ rv = buf.read(size)
+ self._rbuf = StringIO()
+ self._rbuf.write(buf.read())
+ return rv
+
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
+ while True:
+ left = size - buf_len
+ # recv() will malloc the amount of memory given as its
+ # parameter even though it often returns much less data
+ # than that. The returned data string is short lived
+ # as we copy it into a StringIO and free it. This avoids
+ # fragmentation issues on many platforms.
+ try:
+ data = self._sock.recv(left)
+ except OpenSSL.SSL.WantReadError:
+ continue
+ if not data:
+ break
+ n = len(data)
+ if n == size and not buf_len:
+ # Shortcut. Avoid buffer data copies when:
+ # - We have no data in our buffer.
+ # AND
+ # - Our call to recv returned exactly the
+ # number of bytes we were asked to read.
+ return data
+ if n == left:
+ buf.write(data)
+ del data # explicit free
+ break
+ assert n <= left, "recv(%d) returned %d bytes" % (left, n)
+ buf.write(data)
+ buf_len += n
+ del data # explicit free
+ #assert buf_len == buf.tell()
+ return buf.getvalue()
+
+ def readline(self, size=-1):
+ buf = self._rbuf
+ buf.seek(0, 2) # seek end
+ if buf.tell() > 0:
+ # check if we already have it in our buffer
+ buf.seek(0)
+ bline = buf.readline(size)
+ if bline.endswith('\n') or len(bline) == size:
+ self._rbuf = StringIO()
+ self._rbuf.write(buf.read())
+ return bline
+ del bline
+ if size < 0:
+ # Read until \n or EOF, whichever comes first
+ if self._rbufsize <= 1:
+ # Speed up unbuffered case
+ buf.seek(0)
+ buffers = [buf.read()]
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
+ data = None
+ recv = self._sock.recv
+ while True:
+ try:
+ while data != "\n":
+ data = recv(1)
+ if not data:
+ break
+ buffers.append(data)
+ except OpenSSL.SSL.WantReadError:
+ continue
+ break
+ return "".join(buffers)
+
+ buf.seek(0, 2) # seek end
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
+ while True:
+ try:
+ data = self._sock.recv(self._rbufsize)
+ except OpenSSL.SSL.WantReadError:
+ continue
+ if not data:
+ break
+ nl = data.find('\n')
+ if nl >= 0:
+ nl += 1
+ buf.write(data[:nl])
+ self._rbuf.write(data[nl:])
+ del data
+ break
+ buf.write(data)
+ return buf.getvalue()
+ else:
+ # Read until size bytes or \n or EOF seen, whichever comes first
+ buf.seek(0, 2) # seek end
+ buf_len = buf.tell()
+ if buf_len >= size:
+ buf.seek(0)
+ rv = buf.read(size)
+ self._rbuf = StringIO()
+ self._rbuf.write(buf.read())
+ return rv
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
+ while True:
+ try:
+ data = self._sock.recv(self._rbufsize)
+ except OpenSSL.SSL.WantReadError:
+ continue
+ if not data:
+ break
+ left = size - buf_len
+ # did we just receive a newline?
+ nl = data.find('\n', 0, left)
+ if nl >= 0:
+ nl += 1
+ # save the excess data to _rbuf
+ self._rbuf.write(data[nl:])
+ if buf_len:
+ buf.write(data[:nl])
+ break
+ else:
+ # Shortcut. Avoid data copy through buf when returning
+ # a substring of our first recv().
+ return data[:nl]
+ n = len(data)
+ if n == size and not buf_len:
+ # Shortcut. Avoid data copy through buf when
+ # returning exactly all of our first recv().
+ return data
+ if n >= left:
+ buf.write(data[:left])
+ self._rbuf.write(data[left:])
+ break
+ buf.write(data)
+ buf_len += n
+ #assert buf_len == buf.tell()
+ return buf.getvalue()
+
+
class WrappedSocket(object):
'''API-compatibility wrapper for Python OpenSSL's Connection-class.'''
@@ -106,8 +272,11 @@ class WrappedSocket(object):
self.connection = connection
self.socket = socket
+ def fileno(self):
+ return self.socket.fileno()
+
def makefile(self, mode, bufsize=-1):
- return _fileobject(self.connection, mode, bufsize)
+ return fileobject(self.connection, mode, bufsize)
def settimeout(self, timeout):
return self.socket.settimeout(timeout)
@@ -115,10 +284,14 @@ class WrappedSocket(object):
def sendall(self, data):
return self.connection.sendall(data)
+ def close(self):
+ return self.connection.shutdown()
+
def getpeercert(self, binary_form=False):
x509 = self.connection.get_peer_certificate()
+
if not x509:
- raise ssl.SSLError('')
+ return x509
if binary_form:
return OpenSSL.crypto.dump_certificate(
@@ -159,9 +332,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
cnx = OpenSSL.SSL.Connection(ctx, sock)
cnx.set_tlsext_host_name(server_hostname)
cnx.set_connect_state()
- try:
- cnx.do_handshake()
- except OpenSSL.SSL.Error as e:
- raise ssl.SSLError('bad handshake', e)
+ while True:
+ try:
+ cnx.do_handshake()
+ except OpenSSL.SSL.WantReadError:
+ continue
+ except OpenSSL.SSL.Error as e:
+ raise ssl.SSLError('bad handshake', e)
+ break
return WrappedSocket(cnx, sock)
diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py
index 8dd76af..98ef9ab 100644
--- a/urllib3/exceptions.py
+++ b/urllib3/exceptions.py
@@ -20,7 +20,18 @@ class PoolError(HTTPError):
def __reduce__(self):
# For pickling purposes.
- return self.__class__, (None, self.url)
+ return self.__class__, (None, None)
+
+
+class RequestError(PoolError):
+ "Base exception for PoolErrors that have associated URLs."
+ def __init__(self, pool, url, message):
+ self.url = url
+ PoolError.__init__(self, pool, message)
+
+ def __reduce__(self):
+ # For pickling purposes.
+ return self.__class__, (None, self.url, None)
class SSLError(HTTPError):
@@ -28,6 +39,11 @@ class SSLError(HTTPError):
pass
+class ProxyError(HTTPError):
+ "Raised when the connection to a proxy fails."
+ pass
+
+
class DecodeError(HTTPError):
"Raised when automatic decoding based on Content-Type fails."
pass
@@ -35,7 +51,7 @@ class DecodeError(HTTPError):
## Leaf Exceptions
-class MaxRetryError(PoolError):
+class MaxRetryError(RequestError):
"Raised when the maximum number of retries is exceeded."
def __init__(self, pool, url, reason=None):
@@ -47,23 +63,41 @@ class MaxRetryError(PoolError):
else:
message += " (Caused by redirect)"
- PoolError.__init__(self, pool, message)
- self.url = url
+ RequestError.__init__(self, pool, url, message)
-class HostChangedError(PoolError):
+class HostChangedError(RequestError):
"Raised when an existing pool gets a request for a foreign host."
def __init__(self, pool, url, retries=3):
message = "Tried to open a foreign host with url: %s" % url
- PoolError.__init__(self, pool, message)
-
- self.url = url
+ RequestError.__init__(self, pool, url, message)
self.retries = retries
-class TimeoutError(PoolError):
- "Raised when a socket timeout occurs."
+class TimeoutStateError(HTTPError):
+ """ Raised when passing an invalid state to a timeout """
+ pass
+
+
+class TimeoutError(HTTPError):
+ """ Raised when a socket timeout error occurs.
+
+ Catching this error will catch both :exc:`ReadTimeoutErrors
+ <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
+ """
+ pass
+
+
+class ReadTimeoutError(TimeoutError, RequestError):
+ "Raised when a socket timeout occurs while receiving data from a server"
+ pass
+
+
+# This timeout error does not have a URL attached and needs to inherit from the
+# base HTTPError
+class ConnectTimeoutError(TimeoutError):
+ "Raised when a socket timeout occurs while connecting to a server"
pass
diff --git a/urllib3/fields.py b/urllib3/fields.py
new file mode 100644
index 0000000..ed01765
--- /dev/null
+++ b/urllib3/fields.py
@@ -0,0 +1,177 @@
+# urllib3/fields.py
+# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+#
+# This module is part of urllib3 and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+import email.utils
+import mimetypes
+
+from .packages import six
+
+
+def guess_content_type(filename, default='application/octet-stream'):
+ """
+ Guess the "Content-Type" of a file.
+
+ :param filename:
+ The filename to guess the "Content-Type" of using :mod:`mimetimes`.
+ :param default:
+ If no "Content-Type" can be guessed, default to `default`.
+ """
+ if filename:
+ return mimetypes.guess_type(filename)[0] or default
+ return default
+
+
+def format_header_param(name, value):
+ """
+ Helper function to format and quote a single header parameter.
+
+ Particularly useful for header parameters which might contain
+ non-ASCII values, like file names. This follows RFC 2231, as
+ suggested by RFC 2388 Section 4.4.
+
+ :param name:
+ The name of the parameter, a string expected to be ASCII only.
+ :param value:
+ The value of the parameter, provided as a unicode string.
+ """
+ if not any(ch in value for ch in '"\\\r\n'):
+ result = '%s="%s"' % (name, value)
+ try:
+ result.encode('ascii')
+ except UnicodeEncodeError:
+ pass
+ else:
+ return result
+ if not six.PY3: # Python 2:
+ value = value.encode('utf-8')
+ value = email.utils.encode_rfc2231(value, 'utf-8')
+ value = '%s*=%s' % (name, value)
+ return value
+
+
+class RequestField(object):
+ """
+ A data container for request body parameters.
+
+ :param name:
+ The name of this request field.
+ :param data:
+ The data/value body.
+ :param filename:
+ An optional filename of the request field.
+ :param headers:
+ An optional dict-like object of headers to initially use for the field.
+ """
+ def __init__(self, name, data, filename=None, headers=None):
+ self._name = name
+ self._filename = filename
+ self.data = data
+ self.headers = {}
+ if headers:
+ self.headers = dict(headers)
+
+ @classmethod
+ def from_tuples(cls, fieldname, value):
+ """
+ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
+
+ Supports constructing :class:`~urllib3.fields.RequestField` from parameter
+ of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type)
+ tuple where the MIME type is optional. For example: ::
+
+ 'foo': 'bar',
+ 'fakefile': ('foofile.txt', 'contents of foofile'),
+ 'realfile': ('barfile.txt', open('realfile').read()),
+ 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
+ 'nonamefile': 'contents of nonamefile field',
+
+ Field names and filenames must be unicode.
+ """
+ if isinstance(value, tuple):
+ if len(value) == 3:
+ filename, data, content_type = value
+ else:
+ filename, data = value
+ content_type = guess_content_type(filename)
+ else:
+ filename = None
+ content_type = None
+ data = value
+
+ request_param = cls(fieldname, data, filename=filename)
+ request_param.make_multipart(content_type=content_type)
+
+ return request_param
+
+ def _render_part(self, name, value):
+ """
+ Overridable helper function to format a single header parameter.
+
+ :param name:
+ The name of the parameter, a string expected to be ASCII only.
+ :param value:
+ The value of the parameter, provided as a unicode string.
+ """
+ return format_header_param(name, value)
+
+ def _render_parts(self, header_parts):
+ """
+ Helper function to format and quote a single header.
+
+ Useful for single headers that are composed of multiple items. E.g.,
+ 'Content-Disposition' fields.
+
+ :param header_parts:
+ A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as
+ `k1="v1"; k2="v2"; ...`.
+ """
+ parts = []
+ iterable = header_parts
+ if isinstance(header_parts, dict):
+ iterable = header_parts.items()
+
+ for name, value in iterable:
+ if value:
+ parts.append(self._render_part(name, value))
+
+ return '; '.join(parts)
+
+ def render_headers(self):
+ """
+ Renders the headers for this request field.
+ """
+ lines = []
+
+ sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location']
+ for sort_key in sort_keys:
+ if self.headers.get(sort_key, False):
+ lines.append('%s: %s' % (sort_key, self.headers[sort_key]))
+
+ for header_name, header_value in self.headers.items():
+ if header_name not in sort_keys:
+ if header_value:
+ lines.append('%s: %s' % (header_name, header_value))
+
+ lines.append('\r\n')
+ return '\r\n'.join(lines)
+
+ def make_multipart(self, content_disposition=None, content_type=None, content_location=None):
+ """
+ Makes this request field into a multipart request field.
+
+ This method overrides "Content-Disposition", "Content-Type" and
+ "Content-Location" headers to the request parameter.
+
+ :param content_type:
+ The 'Content-Type' of the request body.
+ :param content_location:
+ The 'Content-Location' of the request body.
+
+ """
+ self.headers['Content-Disposition'] = content_disposition or 'form-data'
+ self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))])
+ self.headers['Content-Type'] = content_type
+ self.headers['Content-Location'] = content_location
diff --git a/urllib3/filepost.py b/urllib3/filepost.py
index 526a740..4575582 100644
--- a/urllib3/filepost.py
+++ b/urllib3/filepost.py
@@ -12,6 +12,7 @@ from io import BytesIO
from .packages import six
from .packages.six import b
+from .fields import RequestField
writer = codecs.lookup('utf-8')[3]
@@ -23,15 +24,38 @@ def choose_boundary():
return uuid4().hex
-def get_content_type(filename):
- return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+def iter_field_objects(fields):
+ """
+ Iterate over fields.
+
+ Supports list of (k, v) tuples and dicts, and lists of
+ :class:`~urllib3.fields.RequestField`.
+
+ """
+ if isinstance(fields, dict):
+ i = six.iteritems(fields)
+ else:
+ i = iter(fields)
+
+ for field in i:
+ if isinstance(field, RequestField):
+ yield field
+ else:
+ yield RequestField.from_tuples(*field)
def iter_fields(fields):
"""
Iterate over fields.
+ .. deprecated ::
+
+ The addition of `~urllib3.fields.RequestField` makes this function
+ obsolete. Instead, use :func:`iter_field_objects`, which returns
+ `~urllib3.fields.RequestField` objects, instead.
+
Supports list of (k, v) tuples and dicts.
+
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))
@@ -44,15 +68,7 @@ def encode_multipart_formdata(fields, boundary=None):
Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
:param fields:
- Dictionary of fields or list of (key, value) or (key, value, MIME type)
- field tuples. The key is treated as the field name, and the value as
- the body of the form-data bytes. If the value is a tuple of two
- elements, then the first element is treated as the filename of the
- form-data section and a suitable MIME type is guessed based on the
- filename. If the value is a tuple of three elements, then the third
- element is treated as an explicit MIME type of the form-data section.
-
- Field names and filenames must be unicode.
+ Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
:param boundary:
If not specified, then a random boundary will be generated using
@@ -62,24 +78,11 @@ def encode_multipart_formdata(fields, boundary=None):
if boundary is None:
boundary = choose_boundary()
- for fieldname, value in iter_fields(fields):
+ for field in iter_field_objects(fields):
body.write(b('--%s\r\n' % (boundary)))
- if isinstance(value, tuple):
- if len(value) == 3:
- filename, data, content_type = value
- else:
- filename, data = value
- content_type = get_content_type(filename)
- writer(body).write('Content-Disposition: form-data; name="%s"; '
- 'filename="%s"\r\n' % (fieldname, filename))
- body.write(b('Content-Type: %s\r\n\r\n' %
- (content_type,)))
- else:
- data = value
- writer(body).write('Content-Disposition: form-data; name="%s"\r\n'
- % (fieldname))
- body.write(b'\r\n')
+ writer(body).write(field.render_headers())
+ data = field.data
if isinstance(data, int):
data = str(data) # Backwards compatibility
diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py
index 9560b04..2d61ac2 100644
--- a/urllib3/packages/ssl_match_hostname/__init__.py
+++ b/urllib3/packages/ssl_match_hostname/__init__.py
@@ -7,23 +7,60 @@ __version__ = '3.2.2'
class CertificateError(ValueError):
pass
-def _dnsname_to_pat(dn):
+def _dnsname_match(dn, hostname, max_wildcards=1):
+ """Matching according to RFC 6125, section 6.4.3
+
+ http://tools.ietf.org/html/rfc6125#section-6.4.3
+ """
pats = []
- for frag in dn.split(r'.'):
- if frag == '*':
- # When '*' is a fragment by itself, it matches a non-empty dotless
- # fragment.
- pats.append('[^.]+')
- else:
- # Otherwise, '*' matches any dotless fragment.
- frag = re.escape(frag)
- pats.append(frag.replace(r'\*', '[^.]*'))
- return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
+ if not dn:
+ return False
+
+ parts = dn.split(r'.')
+ leftmost = parts[0]
+
+ wildcards = leftmost.count('*')
+ if wildcards > max_wildcards:
+ # Issue #17980: avoid denials of service by refusing more
+ # than one wildcard per fragment. A survery of established
+ # policy among SSL implementations showed it to be a
+ # reasonable choice.
+ raise CertificateError(
+ "too many wildcards in certificate DNS name: " + repr(dn))
+
+ # speed up common case w/o wildcards
+ if not wildcards:
+ return dn.lower() == hostname.lower()
+
+ # RFC 6125, section 6.4.3, subitem 1.
+ # The client SHOULD NOT attempt to match a presented identifier in which
+ # the wildcard character comprises a label other than the left-most label.
+ if leftmost == '*':
+ # When '*' is a fragment by itself, it matches a non-empty dotless
+ # fragment.
+ pats.append('[^.]+')
+ elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
+ # RFC 6125, section 6.4.3, subitem 3.
+ # The client SHOULD NOT attempt to match a presented identifier
+ # where the wildcard character is embedded within an A-label or
+ # U-label of an internationalized domain name.
+ pats.append(re.escape(leftmost))
+ else:
+ # Otherwise, '*' matches any dotless string, e.g. www*
+ pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
+
+ # add the remaining fragments, ignore any wildcards
+ for frag in parts[1:]:
+ pats.append(re.escape(frag))
+
+ pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
+ return pat.match(hostname)
+
def match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
- SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules
- are mostly followed, but IP addresses are not accepted for *hostname*.
+ SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
+ rules are followed, but IP addresses are not accepted for *hostname*.
CertificateError is raised on failure. On success, the function
returns nothing.
@@ -34,7 +71,7 @@ def match_hostname(cert, hostname):
san = cert.get('subjectAltName', ())
for key, value in san:
if key == 'DNS':
- if _dnsname_to_pat(value).match(hostname):
+ if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if not dnsnames:
@@ -45,7 +82,7 @@ def match_hostname(cert, hostname):
# XXX according to RFC 2818, the most specific Common Name
# must be used.
if key == 'commonName':
- if _dnsname_to_pat(value).match(hostname):
+ if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if len(dnsnames) > 1:
diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py
index ce0c248..e7f8667 100644
--- a/urllib3/poolmanager.py
+++ b/urllib3/poolmanager.py
@@ -6,9 +6,14 @@
import logging
+try: # Python 3
+ from urllib.parse import urljoin
+except ImportError:
+ from urlparse import urljoin
+
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
-from .connectionpool import connection_from_url, port_by_scheme
+from .connectionpool import port_by_scheme
from .request import RequestMethods
from .util import parse_url
@@ -55,6 +60,8 @@ class PoolManager(RequestMethods):
"""
+ proxy = None
+
def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
RequestMethods.__init__(self, headers)
self.connection_pool_kw = connection_pool_kw
@@ -94,20 +101,23 @@ class PoolManager(RequestMethods):
If ``port`` isn't given, it will be derived from the ``scheme`` using
``urllib3.connectionpool.port_by_scheme``.
"""
+
scheme = scheme or 'http'
+
port = port or port_by_scheme.get(scheme, 80)
pool_key = (scheme, host, port)
- # If the scheme, host, or port doesn't match existing open connections,
- # open a new ConnectionPool.
- pool = self.pools.get(pool_key)
- if pool:
- return pool
+ with self.pools.lock:
+ # If the scheme, host, or port doesn't match existing open
+ # connections, open a new ConnectionPool.
+ pool = self.pools.get(pool_key)
+ if pool:
+ return pool
- # Make a fresh ConnectionPool of the desired type
- pool = self._new_pool(scheme, host, port)
- self.pools[pool_key] = pool
+ # Make a fresh ConnectionPool of the desired type
+ pool = self._new_pool(scheme, host, port)
+ self.pools[pool_key] = pool
return pool
def connection_from_url(self, url):
@@ -139,12 +149,19 @@ class PoolManager(RequestMethods):
if 'headers' not in kw:
kw['headers'] = self.headers
- response = conn.urlopen(method, u.request_uri, **kw)
+ if self.proxy is not None and u.scheme == "http":
+ response = conn.urlopen(method, url, **kw)
+ else:
+ response = conn.urlopen(method, u.request_uri, **kw)
redirect_location = redirect and response.get_redirect_location()
if not redirect_location:
return response
+ # Support relative URLs for redirecting.
+ redirect_location = urljoin(url, redirect_location)
+
+ # RFC 2616, Section 10.3.4
if response.status == 303:
method = 'GET'
@@ -154,15 +171,59 @@ class PoolManager(RequestMethods):
return self.urlopen(method, redirect_location, **kw)
-class ProxyManager(RequestMethods):
+class ProxyManager(PoolManager):
"""
- Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method
- will make requests to any url through the defined proxy. The ProxyManager
- class will automatically set the 'Host' header if it is not provided.
+ Behaves just like :class:`PoolManager`, but sends all requests through
+ the defined proxy, using the CONNECT method for HTTPS URLs.
+
+ :param poxy_url:
+ The URL of the proxy to be used.
+
+ :param proxy_headers:
+ A dictionary contaning headers that will be sent to the proxy. In case
+ of HTTP they are being sent with each request, while in the
+ HTTPS/CONNECT case they are sent only once. Could be used for proxy
+ authentication.
+
+ Example:
+ >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
+ >>> r1 = proxy.request('GET', 'http://google.com/')
+ >>> r2 = proxy.request('GET', 'http://httpbin.org/')
+ >>> len(proxy.pools)
+ 1
+ >>> r3 = proxy.request('GET', 'https://httpbin.org/')
+ >>> r4 = proxy.request('GET', 'https://twitter.com/')
+ >>> len(proxy.pools)
+ 3
+
"""
- def __init__(self, proxy_pool):
- self.proxy_pool = proxy_pool
+ def __init__(self, proxy_url, num_pools=10, headers=None,
+ proxy_headers=None, **connection_pool_kw):
+
+ if isinstance(proxy_url, HTTPConnectionPool):
+ proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
+ proxy_url.port)
+ proxy = parse_url(proxy_url)
+ if not proxy.port:
+ port = port_by_scheme.get(proxy.scheme, 80)
+ proxy = proxy._replace(port=port)
+ self.proxy = proxy
+ self.proxy_headers = proxy_headers or {}
+ assert self.proxy.scheme in ("http", "https"), \
+ 'Not supported proxy scheme %s' % self.proxy.scheme
+ connection_pool_kw['_proxy'] = self.proxy
+ connection_pool_kw['_proxy_headers'] = self.proxy_headers
+ super(ProxyManager, self).__init__(
+ num_pools, headers, **connection_pool_kw)
+
+ def connection_from_host(self, host, port=None, scheme='http'):
+ if scheme == "https":
+ return super(ProxyManager, self).connection_from_host(
+ host, port, scheme)
+
+ return super(ProxyManager, self).connection_from_host(
+ self.proxy.host, self.proxy.port, self.proxy.scheme)
def _set_proxy_headers(self, url, headers=None):
"""
@@ -171,22 +232,28 @@ class ProxyManager(RequestMethods):
"""
headers_ = {'Accept': '*/*'}
- host = parse_url(url).host
- if host:
- headers_['Host'] = host
+ netloc = parse_url(url).netloc
+ if netloc:
+ headers_['Host'] = netloc
if headers:
headers_.update(headers)
-
return headers_
- def urlopen(self, method, url, **kw):
+ def urlopen(self, method, url, redirect=True, **kw):
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
- kw['assert_same_host'] = False
- kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers'))
- return self.proxy_pool.urlopen(method, url, **kw)
+ u = parse_url(url)
+
+ if u.scheme == "http":
+ # It's too late to set proxy headers on per-request basis for
+ # tunnelled HTTPS connections, should use
+ # constructor's proxy_headers instead.
+ kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
+ self.headers))
+ kw['headers'].update(self.proxy_headers)
+
+ return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
-def proxy_from_url(url, **pool_kw):
- proxy_pool = connection_from_url(url, **pool_kw)
- return ProxyManager(proxy_pool)
+def proxy_from_url(url, **kw):
+ return ProxyManager(proxy_url=url, **kw)
diff --git a/urllib3/request.py b/urllib3/request.py
index bf0256e..66a9a0e 100644
--- a/urllib3/request.py
+++ b/urllib3/request.py
@@ -30,7 +30,7 @@ class RequestMethods(object):
in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
- encoded in the *body* of the request using multipart or www-orm-urlencoded
+ encoded in the *body* of the request using multipart or www-form-urlencoded
(such as for POST, PUT, PATCH).
:meth:`.request` is for making any kind of request, it will look up the
diff --git a/urllib3/response.py b/urllib3/response.py
index 1685760..4efff5a 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -7,9 +7,11 @@
import logging
import zlib
+import io
from .exceptions import DecodeError
from .packages.six import string_types as basestring, binary_type
+from .util import is_fp_closed
log = logging.getLogger(__name__)
@@ -48,7 +50,7 @@ def _get_decoder(mode):
return DeflateDecoder()
-class HTTPResponse(object):
+class HTTPResponse(io.IOBase):
"""
HTTP Response container.
@@ -72,6 +74,7 @@ class HTTPResponse(object):
"""
CONTENT_DECODERS = ['gzip', 'deflate']
+ REDIRECT_STATUSES = [301, 302, 303, 307, 308]
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
@@ -105,7 +108,7 @@ class HTTPResponse(object):
code and valid location. ``None`` if redirect status and no
location. ``False`` if not a redirect status code.
"""
- if self.status in [301, 302, 303, 307]:
+ if self.status in self.REDIRECT_STATUSES:
return self.headers.get('location')
return False
@@ -183,11 +186,13 @@ class HTTPResponse(object):
try:
if decode_content and self._decoder:
data = self._decoder.decompress(data)
- except (IOError, zlib.error):
- raise DecodeError("Received response with content-encoding: %s, but "
- "failed to decode it." % content_encoding)
+ except (IOError, zlib.error) as e:
+ raise DecodeError(
+ "Received response with content-encoding: %s, but "
+ "failed to decode it." % content_encoding,
+ e)
- if flush_decoder and self._decoder:
+ if flush_decoder and decode_content and self._decoder:
buf = self._decoder.decompress(binary_type())
data += buf + self._decoder.flush()
@@ -200,6 +205,29 @@ class HTTPResponse(object):
if self._original_response and self._original_response.isclosed():
self.release_conn()
+ def stream(self, amt=2**16, decode_content=None):
+ """
+ A generator wrapper for the read() method. A call will block until
+ ``amt`` bytes have been read from the connection or until the
+ connection is closed.
+
+ :param amt:
+ How much of the content to read. The generator will return up to
+ much data per iteration, but may return less. This is particularly
+ likely when using compressed data. However, the empty string will
+ never be returned.
+
+ :param decode_content:
+ If True, will attempt to decode the body based on the
+ 'content-encoding' header.
+ """
+ while not is_fp_closed(self._fp):
+ data = self.read(amt=amt, decode_content=decode_content)
+
+ if data:
+ yield data
+
+
@classmethod
def from_httplib(ResponseCls, r, **response_kw):
"""
@@ -239,3 +267,35 @@ class HTTPResponse(object):
def getheader(self, name, default=None):
return self.headers.get(name, default)
+
+ # Overrides from io.IOBase
+ def close(self):
+ if not self.closed:
+ self._fp.close()
+
+ @property
+ def closed(self):
+ if self._fp is None:
+ return True
+ elif hasattr(self._fp, 'closed'):
+ return self._fp.closed
+ elif hasattr(self._fp, 'isclosed'): # Python 2
+ return self._fp.isclosed()
+ else:
+ return True
+
+ def fileno(self):
+ if self._fp is None:
+ raise IOError("HTTPResponse has no file to get a fileno from")
+ elif hasattr(self._fp, "fileno"):
+ return self._fp.fileno()
+ else:
+ raise IOError("The file-like object this HTTPResponse is wrapped "
+ "around has no file descriptor")
+
+ def flush(self):
+ if self._fp is not None and hasattr(self._fp, 'flush'):
+ return self._fp.flush()
+
+ def readable(self):
+ return True
diff --git a/urllib3/util.py b/urllib3/util.py
index 544f9ed..266c9ed 100644
--- a/urllib3/util.py
+++ b/urllib3/util.py
@@ -6,10 +6,11 @@
from base64 import b64encode
+from binascii import hexlify, unhexlify
from collections import namedtuple
-from socket import error as SocketError
from hashlib import md5, sha1
-from binascii import hexlify, unhexlify
+from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT
+import time
try:
from select import poll, POLLIN
@@ -31,9 +32,234 @@ try: # Test for SSL features
except ImportError:
pass
-
from .packages import six
-from .exceptions import LocationParseError, SSLError
+from .exceptions import LocationParseError, SSLError, TimeoutStateError
+
+
+_Default = object()
+# The default timeout to use for socket connections. This is the attribute used
+# by httplib to define the default timeout
+
+
+def current_time():
+ """
+ Retrieve the current time, this function is mocked out in unit testing.
+ """
+ return time.time()
+
+
+class Timeout(object):
+ """
+ Utility object for storing timeout values.
+
+ Example usage:
+
+ .. code-block:: python
+
+ timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
+ pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
+ pool.request(...) # Etc, etc
+
+ :param connect:
+ The maximum amount of time to wait for a connection attempt to a server
+ to succeed. Omitting the parameter will default the connect timeout to
+ the system default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout for connection attempts.
+
+ :type connect: integer, float, or None
+
+ :param read:
+ The maximum amount of time to wait between consecutive
+ read operations for a response from the server. Omitting
+ the parameter will default the read timeout to the system
+ default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout.
+
+ :type read: integer, float, or None
+
+ :param total:
+ The maximum amount of time to wait for an HTTP request to connect and
+ return. This combines the connect and read timeouts into one. In the
+ event that both a connect timeout and a total are specified, or a read
+ timeout and a total are specified, the shorter timeout will be applied.
+
+ Defaults to None.
+
+
+ :type total: integer, float, or None
+
+ .. note::
+
+ Many factors can affect the total amount of time for urllib3 to return
+ an HTTP response. Specifically, Python's DNS resolver does not obey the
+ timeout specified on the socket. Other factors that can affect total
+ request time include high CPU load, high swap, the program running at a
+ low priority level, or other behaviors. The observed running time for
+ urllib3 to return a response may be greater than the value passed to
+ `total`.
+
+ In addition, the read and total timeouts only measure the time between
+ read operations on the socket connecting the client and the server, not
+ the total amount of time for the request to return a complete response.
+ As an example, you may want a request to return within 7 seconds or
+ fail, so you set the ``total`` timeout to 7 seconds. If the server
+ sends one byte to you every 5 seconds, the request will **not** trigger
+ time out. This case is admittedly rare.
+ """
+
+ #: A sentinel object representing the default timeout value
+ DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
+
+ def __init__(self, connect=_Default, read=_Default, total=None):
+ self._connect = self._validate_timeout(connect, 'connect')
+ self._read = self._validate_timeout(read, 'read')
+ self.total = self._validate_timeout(total, 'total')
+ self._start_connect = None
+
+ def __str__(self):
+ return '%s(connect=%r, read=%r, total=%r)' % (
+ type(self).__name__, self._connect, self._read, self.total)
+
+
+ @classmethod
+ def _validate_timeout(cls, value, name):
+ """ Check that a timeout attribute is valid
+
+ :param value: The timeout value to validate
+ :param name: The name of the timeout attribute to validate. This is used
+ for clear error messages
+ :return: the value
+ :raises ValueError: if the type is not an integer or a float, or if it
+ is a numeric value less than zero
+ """
+ if value is _Default:
+ return cls.DEFAULT_TIMEOUT
+
+ if value is None or value is cls.DEFAULT_TIMEOUT:
+ return value
+
+ try:
+ float(value)
+ except (TypeError, ValueError):
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ try:
+ if value < 0:
+ raise ValueError("Attempted to set %s timeout to %s, but the "
+ "timeout cannot be set to a value less "
+ "than 0." % (name, value))
+ except TypeError: # Python 3
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ return value
+
+ @classmethod
+ def from_float(cls, timeout):
+ """ Create a new Timeout from a legacy timeout value.
+
+ The timeout value used by httplib.py sets the same timeout on the
+ connect(), and recv() socket requests. This creates a :class:`Timeout`
+ object that sets the individual timeouts to the ``timeout`` value passed
+ to this function.
+
+ :param timeout: The legacy timeout value
+ :type timeout: integer, float, sentinel default object, or None
+ :return: a Timeout object
+ :rtype: :class:`Timeout`
+ """
+ return Timeout(read=timeout, connect=timeout)
+
+ def clone(self):
+ """ Create a copy of the timeout object
+
+ Timeout properties are stored per-pool but each request needs a fresh
+ Timeout object to ensure each one has its own start/stop configured.
+
+ :return: a copy of the timeout object
+ :rtype: :class:`Timeout`
+ """
+ # We can't use copy.deepcopy because that will also create a new object
+ # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
+ # detect the user default.
+ return Timeout(connect=self._connect, read=self._read,
+ total=self.total)
+
+ def start_connect(self):
+ """ Start the timeout clock, used during a connect() attempt
+
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to start a timer that has been started already.
+ """
+ if self._start_connect is not None:
+ raise TimeoutStateError("Timeout timer has already been started.")
+ self._start_connect = current_time()
+ return self._start_connect
+
+ def get_connect_duration(self):
+ """ Gets the time elapsed since the call to :meth:`start_connect`.
+
+ :return: the elapsed time
+ :rtype: float
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to get duration for a timer that hasn't been started.
+ """
+ if self._start_connect is None:
+ raise TimeoutStateError("Can't get connect duration for timer "
+ "that has not started.")
+ return current_time() - self._start_connect
+
+ @property
+ def connect_timeout(self):
+ """ Get the value to use when setting a connection timeout.
+
+ This will be a positive float or integer, the value None
+ (never timeout), or the default system timeout.
+
+ :return: the connect timeout
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ """
+ if self.total is None:
+ return self._connect
+
+ if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
+ return self.total
+
+ return min(self._connect, self.total)
+
+ @property
+ def read_timeout(self):
+ """ Get the value for the read timeout.
+
+ This assumes some time has elapsed in the connection timeout and
+ computes the read timeout appropriately.
+
+ If self.total is set, the read timeout is dependent on the amount of
+ time taken by the connect timeout. If the connection time has not been
+ established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
+ raised.
+
+ :return: the value to use for the read timeout
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
+ has not yet been called on this object.
+ """
+ if (self.total is not None and
+ self.total is not self.DEFAULT_TIMEOUT and
+ self._read is not None and
+ self._read is not self.DEFAULT_TIMEOUT):
+ # in case the connect timeout has not yet been established.
+ if self._start_connect is None:
+ return self._read
+ return max(0, min(self.total - self.get_connect_duration(),
+ self._read))
+ elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
+ return max(0, self.total - self.get_connect_duration())
+ else:
+ return self._read
class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
@@ -61,6 +287,13 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',
return uri
+ @property
+ def netloc(self):
+ """Network location including host and port"""
+ if self.port:
+ return '%s:%d' % (self.host, self.port)
+ return self.host
+
def split_first(s, delims):
"""
@@ -114,7 +347,7 @@ def parse_url(url):
# While this code has overlap with stdlib's urlparse, it is much
# simplified for our needs and less annoying.
- # Additionally, this imeplementations does silly things to be optimal
+ # Additionally, this implementations does silly things to be optimal
# on CPython.
scheme = None
@@ -143,7 +376,8 @@ def parse_url(url):
# IPv6
if url and url[0] == '[':
- host, url = url[1:].split(']', 1)
+ host, url = url.split(']', 1)
+ host += ']'
# Port
if ':' in url:
@@ -341,6 +575,20 @@ def assert_fingerprint(cert, fingerprint):
.format(hexlify(fingerprint_bytes),
hexlify(cert_digest)))
+def is_fp_closed(obj):
+ """
+ Checks whether a given file-like object is closed.
+
+ :param obj:
+ The file-like object to check.
+ """
+ if hasattr(obj, 'fp'):
+ # Object is a container for another file-like object that gets released
+ # on exhaustion (e.g. HTTPResponse)
+ return obj.fp is None
+
+ return obj.closed
+
if SSLContext is not None: # Python 3.2+
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,