aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSVN-Git Migration <python-modules-team@lists.alioth.debian.org>2015-10-08 13:19:36 -0700
committerSVN-Git Migration <python-modules-team@lists.alioth.debian.org>2015-10-08 13:19:36 -0700
commit5f949ee35667a6065ab02a3e7ab8c98c9fcdcaed (patch)
tree35ce945e6f6fe74276a6745c96e6a48f6e5d3c68
parent52980ebd0a4eb75acf055a2256e095772c1fa7c6 (diff)
downloadpython-urllib3-5f949ee35667a6065ab02a3e7ab8c98c9fcdcaed.tar
python-urllib3-5f949ee35667a6065ab02a3e7ab8c98c9fcdcaed.tar.gz
Imported Upstream version 1.8
-rw-r--r--CHANGES.rst54
-rw-r--r--CONTRIBUTORS.txt21
-rw-r--r--PKG-INFO58
-rwxr-xr-xdummyserver/server.py159
-rw-r--r--dummyserver/testcase.py63
-rw-r--r--test-requirements.txt2
-rw-r--r--test/__init__.py0
-rw-r--r--test/benchmark.py77
-rw-r--r--test/test_collections.py57
-rw-r--r--test/test_compatibility.py23
-rw-r--r--test/test_connectionpool.py24
-rw-r--r--test/test_exceptions.py45
-rw-r--r--test/test_fields.py43
-rw-r--r--test/test_filepost.py2
-rw-r--r--test/test_response.py113
-rw-r--r--test/test_util.py28
-rw-r--r--urllib3.egg-info/PKG-INFO58
-rw-r--r--urllib3.egg-info/SOURCES.txt7
-rw-r--r--urllib3/__init__.py2
-rw-r--r--urllib3/_collections.py117
-rw-r--r--urllib3/connection.py195
-rw-r--r--urllib3/connectionpool.py290
-rw-r--r--urllib3/contrib/pyopenssl.py85
-rw-r--r--urllib3/exceptions.py5
-rw-r--r--urllib3/filepost.py11
-rw-r--r--urllib3/packages/ssl_match_hostname/__init__.py111
-rw-r--r--urllib3/packages/ssl_match_hostname/_implementation.py105
-rw-r--r--urllib3/poolmanager.py11
-rw-r--r--urllib3/request.py1
-rw-r--r--urllib3/response.py29
-rw-r--r--urllib3/util.py54
31 files changed, 1312 insertions, 538 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 891fd79..ae63682 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,10 +1,62 @@
Changes
=======
+1.8 (2014-03-04)
+++++++++++++++++
+
+* Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in
+ username, and blank ports like 'hostname:').
+
+* New ``urllib3.connection`` module which contains all the HTTPConnection
+ objects.
+
+* Several ``urllib3.util.Timeout``-related fixes. Also changed constructor
+ signature to a more sensible order. [Backwards incompatible]
+ (Issues #252, #262, #263)
+
+* Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274)
+
+* Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which
+ returns the number of bytes read so far. (Issue #277)
+
+* Support for platforms without threading. (Issue #289)
+
+* Expand default-port comparison in ``HTTPConnectionPool.is_same_host``
+ to allow a pool with no specified port to be considered equal to to an
+ HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305)
+
+* Improved default SSL/TLS settings to avoid vulnerabilities.
+ (Issue #309)
+
+* Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors.
+ (Issue #310)
+
+* Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests
+ will send the entire HTTP request ~200 milliseconds faster; however, some of
+ the resulting TCP packets will be smaller. (Issue #254)
+
+* Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl``
+ from the default 64 to 1024 in a single certificate. (Issue #318)
+
+* Headers are now passed and stored as a custom
+ ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``.
+ (Issue #329, #333)
+
+* Headers no longer lose their case on Python 3. (Issue #236)
+
+* ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA
+ certificates on inject. (Issue #332)
+
+* Requests with ``retries=False`` will immediately raise any exceptions without
+ wrapping them in ``MaxRetryError``. (Issue #348)
+
+* Fixed open socket leak with SSL-related failures. (Issue #344, #348)
+
+
1.7.1 (2013-09-25)
++++++++++++++++++
-* Added granular timeout support with new `urllib3.util.Timeout` class.
+* Added granular timeout support with new ``urllib3.util.Timeout`` class.
(Issue #231)
* Fixed Python 3.4 support. (Issue #238)
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index e1aca42..e2dba35 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -90,5 +90,26 @@ In chronological order:
* Kevin Burke <kev@inburke.com> and Pavel Kirichenko <juanych@yandex-team.ru>
* Support for separate connect and request timeouts
+* Peter Waller <p@pwaller.net>
+ * HTTPResponse.tell() for determining amount received over the wire
+
+* Nipunn Koorapati <nipunn1313@gmail.com>
+ * Ignore default ports when comparing hosts for equality
+
+* Danilo @dbrgn <http://dbrgn.ch/>
+ * Disabled TLS compression by default on Python 3.2+
+ * Disabled TLS compression in pyopenssl contrib module
+ * Configurable cipher suites in pyopenssl contrib module
+
+* Roman Bogorodskiy <roman.bogorodskiy@ericsson.com>
+ * Account retries on proxy errors
+
+* Nicolas Delaby <nicolas.delaby@ezeep.com>
+ * Use the platform-specific CA certificate locations
+
+* Josh Schneier <https://github.com/jschneier>
+ * HTTPHeaderDict and associated tests and docs
+ * Bugfixes, docs, test coverage
+
* [Your name or handle] <[email or website]>
* [Brief summary of your changes]
diff --git a/PKG-INFO b/PKG-INFO
index a81ab9c..6a4f31a 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 1.0
+Metadata-Version: 1.1
Name: urllib3
-Version: 1.7.1
+Version: 1.8
Summary: HTTP library with thread-safe connection pooling, file post, and more.
Home-page: http://urllib3.readthedocs.org/
Author: Andrey Petrov
@@ -121,10 +121,62 @@ Description: =======
Changes
=======
+ 1.8 (2014-03-04)
+ ++++++++++++++++
+
+ * Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in
+ username, and blank ports like 'hostname:').
+
+ * New ``urllib3.connection`` module which contains all the HTTPConnection
+ objects.
+
+ * Several ``urllib3.util.Timeout``-related fixes. Also changed constructor
+ signature to a more sensible order. [Backwards incompatible]
+ (Issues #252, #262, #263)
+
+ * Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274)
+
+ * Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which
+ returns the number of bytes read so far. (Issue #277)
+
+ * Support for platforms without threading. (Issue #289)
+
+ * Expand default-port comparison in ``HTTPConnectionPool.is_same_host``
+ to allow a pool with no specified port to be considered equal to to an
+ HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305)
+
+ * Improved default SSL/TLS settings to avoid vulnerabilities.
+ (Issue #309)
+
+ * Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors.
+ (Issue #310)
+
+ * Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests
+ will send the entire HTTP request ~200 milliseconds faster; however, some of
+ the resulting TCP packets will be smaller. (Issue #254)
+
+ * Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl``
+ from the default 64 to 1024 in a single certificate. (Issue #318)
+
+ * Headers are now passed and stored as a custom
+ ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``.
+ (Issue #329, #333)
+
+ * Headers no longer lose their case on Python 3. (Issue #236)
+
+ * ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA
+ certificates on inject. (Issue #332)
+
+ * Requests with ``retries=False`` will immediately raise any exceptions without
+ wrapping them in ``MaxRetryError``. (Issue #348)
+
+ * Fixed open socket leak with SSL-related failures. (Issue #344, #348)
+
+
1.7.1 (2013-09-25)
++++++++++++++++++
- * Added granular timeout support with new `urllib3.util.Timeout` class.
+ * Added granular timeout support with new ``urllib3.util.Timeout`` class.
(Issue #231)
* Fixed Python 3.4 support. (Issue #238)
diff --git a/dummyserver/server.py b/dummyserver/server.py
index f4f98a4..22de456 100755
--- a/dummyserver/server.py
+++ b/dummyserver/server.py
@@ -5,21 +5,21 @@ Dummy server used for unit testing.
"""
from __future__ import print_function
+import errno
import logging
import os
+import random
+import string
import sys
import threading
import socket
-from tornado import netutil
+from tornado.platform.auto import set_close_exec
import tornado.wsgi
import tornado.httpserver
import tornado.ioloop
import tornado.web
-from dummyserver.handlers import TestingApp
-from dummyserver.proxy import ProxyHandler
-
log = logging.getLogger(__name__)
@@ -51,7 +51,7 @@ class SocketServerThread(threading.Thread):
self.ready_event = ready_event
def _start_server(self):
- sock = socket.socket()
+ sock = socket.socket(socket.AF_INET6)
if sys.platform != 'win32':
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((self.host, 0))
@@ -70,59 +70,112 @@ class SocketServerThread(threading.Thread):
self.server = self._start_server()
-class TornadoServerThread(threading.Thread):
- app = tornado.wsgi.WSGIContainer(TestingApp())
+# FIXME: there is a pull request patching bind_sockets in Tornado directly.
+# If it gets merged and released we can drop this and use
+# `tornado.netutil.bind_sockets` again.
+# https://github.com/facebook/tornado/pull/977
- def __init__(self, host='localhost', scheme='http', certs=None,
- ready_event=None):
- threading.Thread.__init__(self)
+def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128,
+ flags=None):
+ """Creates listening sockets bound to the given port and address.
- self.host = host
- self.scheme = scheme
- self.certs = certs
- self.ready_event = ready_event
+ Returns a list of socket objects (multiple sockets are returned if
+ the given address maps to multiple IP addresses, which is most common
+ for mixed IPv4 and IPv6 use).
- def _start_server(self):
- if self.scheme == 'https':
- http_server = tornado.httpserver.HTTPServer(self.app,
- ssl_options=self.certs)
- else:
- http_server = tornado.httpserver.HTTPServer(self.app)
+ Address may be either an IP address or hostname. If it's a hostname,
+ the server will listen on all IP addresses associated with the
+ name. Address may be an empty string or None to listen on all
+ available interfaces. Family may be set to either `socket.AF_INET`
+ or `socket.AF_INET6` to restrict to IPv4 or IPv6 addresses, otherwise
+ both will be used if available.
- family = socket.AF_INET6 if ':' in self.host else socket.AF_INET
- sock, = netutil.bind_sockets(None, address=self.host, family=family)
- self.port = sock.getsockname()[1]
- http_server.add_sockets([sock])
- return http_server
+ The ``backlog`` argument has the same meaning as for
+ `socket.listen() <socket.socket.listen>`.
- def run(self):
- self.ioloop = tornado.ioloop.IOLoop.instance()
- self.server = self._start_server()
- if self.ready_event:
- self.ready_event.set()
- self.ioloop.start()
-
- def stop(self):
- self.ioloop.add_callback(self.server.stop)
- self.ioloop.add_callback(self.ioloop.stop)
-
-
-class ProxyServerThread(TornadoServerThread):
- app = tornado.web.Application([(r'.*', ProxyHandler)])
-
-
-if __name__ == '__main__':
- log.setLevel(logging.DEBUG)
- log.addHandler(logging.StreamHandler(sys.stderr))
-
- from urllib3 import get_host
+ ``flags`` is a bitmask of AI_* flags to `~socket.getaddrinfo`, like
+ ``socket.AI_PASSIVE | socket.AI_NUMERICHOST``.
+ """
+ sockets = []
+ if address == "":
+ address = None
+ if not socket.has_ipv6 and family == socket.AF_UNSPEC:
+ # Python can be compiled with --disable-ipv6, which causes
+ # operations on AF_INET6 sockets to fail, but does not
+ # automatically exclude those results from getaddrinfo
+ # results.
+ # http://bugs.python.org/issue16208
+ family = socket.AF_INET
+ if flags is None:
+ flags = socket.AI_PASSIVE
+ binded_port = None
+ for res in set(socket.getaddrinfo(address, port, family,
+ socket.SOCK_STREAM, 0, flags)):
+ af, socktype, proto, canonname, sockaddr = res
+ try:
+ sock = socket.socket(af, socktype, proto)
+ except socket.error as e:
+ if e.args[0] == errno.EAFNOSUPPORT:
+ continue
+ raise
+ set_close_exec(sock.fileno())
+ if os.name != 'nt':
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ if af == socket.AF_INET6:
+ # On linux, ipv6 sockets accept ipv4 too by default,
+ # but this makes it impossible to bind to both
+ # 0.0.0.0 in ipv4 and :: in ipv6. On other systems,
+ # separate sockets *must* be used to listen for both ipv4
+ # and ipv6. For consistency, always disable ipv4 on our
+ # ipv6 sockets and use a separate ipv4 socket when needed.
+ #
+ # Python 2.x on windows doesn't have IPPROTO_IPV6.
+ if hasattr(socket, "IPPROTO_IPV6"):
+ sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
+
+ # automatic port allocation with port=None
+ # should bind on the same port on IPv4 and IPv6
+ host, requested_port = sockaddr[:2]
+ if requested_port == 0 and binded_port is not None:
+ sockaddr = tuple([host, binded_port] + list(sockaddr[2:]))
+
+ sock.setblocking(0)
+ sock.bind(sockaddr)
+ binded_port = sock.getsockname()[1]
+ sock.listen(backlog)
+ sockets.append(sock)
+ return sockets
+
+
+def run_tornado_app(app, io_loop, certs, scheme, host):
+ if scheme == 'https':
+ http_server = tornado.httpserver.HTTPServer(app, ssl_options=certs,
+ io_loop=io_loop)
+ else:
+ http_server = tornado.httpserver.HTTPServer(app, io_loop=io_loop)
+
+ sockets = bind_sockets(None, address=host)
+ port = sockets[0].getsockname()[1]
+ http_server.add_sockets(sockets)
+ return http_server, port
+
+
+def run_loop_in_thread(io_loop):
+ t = threading.Thread(target=io_loop.start)
+ t.start()
+ return t
- url = "http://localhost:8081"
- if len(sys.argv) > 1:
- url = sys.argv[1]
- print("Starting WSGI server at: %s" % url)
+def get_unreachable_address():
+ while True:
+ host = ''.join(random.choice(string.ascii_lowercase)
+ for _ in range(60))
+ sockaddr = (host, 54321)
- scheme, host, port = get_host(url)
- t = TornadoServerThread(scheme=scheme, host=host, port=port)
- t.start()
+ # check if we are really "lucky" and hit an actual server
+ try:
+ s = socket.create_connection(sockaddr)
+ except socket.error:
+ return sockaddr
+ else:
+ s.close()
diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py
index a2a1da1..35769ef 100644
--- a/dummyserver/testcase.py
+++ b/dummyserver/testcase.py
@@ -2,14 +2,17 @@ import unittest
import socket
import threading
from nose.plugins.skip import SkipTest
+from tornado import ioloop, web, wsgi
from dummyserver.server import (
- TornadoServerThread, SocketServerThread,
+ SocketServerThread,
+ run_tornado_app,
+ run_loop_in_thread,
DEFAULT_CERTS,
- ProxyServerThread,
)
+from dummyserver.handlers import TestingApp
+from dummyserver.proxy import ProxyHandler
-has_ipv6 = hasattr(socket, 'has_ipv6')
class SocketDummyServerTestCase(unittest.TestCase):
@@ -33,7 +36,7 @@ class SocketDummyServerTestCase(unittest.TestCase):
@classmethod
def tearDownClass(cls):
if hasattr(cls, 'server_thread'):
- cls.server_thread.join()
+ cls.server_thread.join(0.1)
class HTTPDummyServerTestCase(unittest.TestCase):
@@ -44,18 +47,16 @@ class HTTPDummyServerTestCase(unittest.TestCase):
@classmethod
def _start_server(cls):
- ready_event = threading.Event()
- cls.server_thread = TornadoServerThread(host=cls.host,
- scheme=cls.scheme,
- certs=cls.certs,
- ready_event=ready_event)
- cls.server_thread.start()
- ready_event.wait()
- cls.port = cls.server_thread.port
+ cls.io_loop = ioloop.IOLoop()
+ app = wsgi.WSGIContainer(TestingApp())
+ cls.server, cls.port = run_tornado_app(app, cls.io_loop, cls.certs,
+ cls.scheme, cls.host)
+ cls.server_thread = run_loop_in_thread(cls.io_loop)
@classmethod
def _stop_server(cls):
- cls.server_thread.stop()
+ cls.io_loop.add_callback(cls.server.stop)
+ cls.io_loop.add_callback(cls.io_loop.stop)
cls.server_thread.join()
@classmethod
@@ -87,27 +88,29 @@ class HTTPDummyProxyTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
- cls.http_thread = TornadoServerThread(host=cls.http_host,
- scheme='http')
- cls.http_thread._start_server()
- cls.http_port = cls.http_thread.port
+ cls.io_loop = ioloop.IOLoop()
- cls.https_thread = TornadoServerThread(
- host=cls.https_host, scheme='https', certs=cls.https_certs)
- cls.https_thread._start_server()
- cls.https_port = cls.https_thread.port
+ app = wsgi.WSGIContainer(TestingApp())
+ cls.http_server, cls.http_port = run_tornado_app(
+ app, cls.io_loop, None, 'http', cls.http_host)
- ready_event = threading.Event()
- cls.proxy_thread = ProxyServerThread(
- host=cls.proxy_host, ready_event=ready_event)
- cls.proxy_thread.start()
- ready_event.wait()
- cls.proxy_port = cls.proxy_thread.port
+ app = wsgi.WSGIContainer(TestingApp())
+ cls.https_server, cls.https_port = run_tornado_app(
+ app, cls.io_loop, cls.https_certs, 'https', cls.http_host)
+
+ app = web.Application([(r'.*', ProxyHandler)])
+ cls.proxy_server, cls.proxy_port = run_tornado_app(
+ app, cls.io_loop, None, 'http', cls.proxy_host)
+
+ cls.server_thread = run_loop_in_thread(cls.io_loop)
@classmethod
def tearDownClass(cls):
- cls.proxy_thread.stop()
- cls.proxy_thread.join()
+ cls.io_loop.add_callback(cls.http_server.stop)
+ cls.io_loop.add_callback(cls.https_server.stop)
+ cls.io_loop.add_callback(cls.proxy_server.stop)
+ cls.io_loop.add_callback(cls.io_loop.stop)
+ cls.server_thread.join()
class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase):
@@ -115,7 +118,7 @@ class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase):
@classmethod
def setUpClass(cls):
- if not has_ipv6:
+ if not socket.has_ipv6:
raise SkipTest('IPv6 not available')
else:
super(IPv6HTTPDummyServerTestCase, cls).setUpClass()
diff --git a/test-requirements.txt b/test-requirements.txt
index f7c3a50..02d70f4 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -1,4 +1,4 @@
nose==1.3
mock==1.0.1
-tornado==2.4.1
+tornado==3.1.1
coverage==3.6
diff --git a/test/__init__.py b/test/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/test/__init__.py
+++ /dev/null
diff --git a/test/benchmark.py b/test/benchmark.py
deleted file mode 100644
index e7049c4..0000000
--- a/test/benchmark.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Really simple rudimentary benchmark to compare ConnectionPool versus standard
-urllib to demonstrate the usefulness of connection re-using.
-"""
-from __future__ import print_function
-
-import sys
-import time
-import urllib
-
-sys.path.append('../')
-import urllib3
-
-
-# URLs to download. Doesn't matter as long as they're from the same host, so we
-# can take advantage of connection re-using.
-TO_DOWNLOAD = [
- 'http://code.google.com/apis/apps/',
- 'http://code.google.com/apis/base/',
- 'http://code.google.com/apis/blogger/',
- 'http://code.google.com/apis/calendar/',
- 'http://code.google.com/apis/codesearch/',
- 'http://code.google.com/apis/contact/',
- 'http://code.google.com/apis/books/',
- 'http://code.google.com/apis/documents/',
- 'http://code.google.com/apis/finance/',
- 'http://code.google.com/apis/health/',
- 'http://code.google.com/apis/notebook/',
- 'http://code.google.com/apis/picasaweb/',
- 'http://code.google.com/apis/spreadsheets/',
- 'http://code.google.com/apis/webmastertools/',
- 'http://code.google.com/apis/youtube/',
-]
-
-
-def urllib_get(url_list):
- assert url_list
- for url in url_list:
- now = time.time()
- r = urllib.urlopen(url)
- elapsed = time.time() - now
- print("Got in %0.3f: %s" % (elapsed, url))
-
-
-def pool_get(url_list):
- assert url_list
- pool = urllib3.connection_from_url(url_list[0])
- for url in url_list:
- now = time.time()
- r = pool.get_url(url)
- elapsed = time.time() - now
- print("Got in %0.3fs: %s" % (elapsed, url))
-
-
-if __name__ == '__main__':
- print("Running pool_get ...")
- now = time.time()
- pool_get(TO_DOWNLOAD)
- pool_elapsed = time.time() - now
-
- print("Running urllib_get ...")
- now = time.time()
- urllib_get(TO_DOWNLOAD)
- urllib_elapsed = time.time() - now
-
- print("Completed pool_get in %0.3fs" % pool_elapsed)
- print("Completed urllib_get in %0.3fs" % urllib_elapsed)
-
-
-"""
-Example results:
-
-Completed pool_get in 1.163s
-Completed urllib_get in 2.318s
-"""
diff --git a/test/test_collections.py b/test/test_collections.py
index b44c58a..4d173ac 100644
--- a/test/test_collections.py
+++ b/test/test_collections.py
@@ -1,6 +1,9 @@
import unittest
-from urllib3._collections import RecentlyUsedContainer as Container
+from urllib3._collections import (
+ HTTPHeaderDict,
+ RecentlyUsedContainer as Container
+)
from urllib3.packages import six
xrange = six.moves.xrange
@@ -121,5 +124,57 @@ class TestLRUContainer(unittest.TestCase):
self.assertRaises(NotImplementedError, d.__iter__)
+
+class TestHTTPHeaderDict(unittest.TestCase):
+ def setUp(self):
+ self.d = HTTPHeaderDict(A='foo')
+ self.d.add('a', 'bar')
+
+ def test_overwriting_with_setitem_replaces(self):
+ d = HTTPHeaderDict()
+
+ d['A'] = 'foo'
+ self.assertEqual(d['a'], 'foo')
+
+ d['a'] = 'bar'
+ self.assertEqual(d['A'], 'bar')
+
+ def test_copy(self):
+ h = self.d.copy()
+ self.assertTrue(self.d is not h)
+ self.assertEqual(self.d, h)
+
+ def test_add(self):
+ d = HTTPHeaderDict()
+
+ d['A'] = 'foo'
+ d.add('a', 'bar')
+
+ self.assertEqual(d['a'], 'foo, bar')
+ self.assertEqual(d['A'], 'foo, bar')
+
+ def test_getlist(self):
+ self.assertEqual(self.d.getlist('a'), ['foo', 'bar'])
+ self.assertEqual(self.d.getlist('A'), ['foo', 'bar'])
+ self.assertEqual(self.d.getlist('b'), [])
+
+ def test_delitem(self):
+ del self.d['a']
+ self.assertFalse('a' in self.d)
+ self.assertFalse('A' in self.d)
+
+ def test_equal(self):
+ b = HTTPHeaderDict({'a': 'foo, bar'})
+ self.assertEqual(self.d, b)
+ c = [('a', 'foo, bar')]
+ self.assertNotEqual(self.d, c)
+
+ def test_len(self):
+ self.assertEqual(len(self.d), 1)
+
+ def test_repr(self):
+ rep = "HTTPHeaderDict({'A': 'foo, bar'})"
+ self.assertEqual(repr(self.d), rep)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_compatibility.py b/test/test_compatibility.py
new file mode 100644
index 0000000..05ee4de
--- /dev/null
+++ b/test/test_compatibility.py
@@ -0,0 +1,23 @@
+import unittest
+import warnings
+
+from urllib3.connection import HTTPConnection
+
+
+class TestVersionCompatibility(unittest.TestCase):
+ def test_connection_strict(self):
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter("always")
+
+ # strict=True is deprecated in Py33+
+ conn = HTTPConnection('localhost', 12345, strict=True)
+
+ if w:
+ self.fail('HTTPConnection raised warning on strict=True: %r' % w[0].message)
+
+ def test_connection_source_address(self):
+ try:
+ # source_address does not exist in Py26-
+ conn = HTTPConnection('localhost', 12345, source_address='127.0.0.1')
+ except TypeError as e:
+ self.fail('HTTPConnection raised TypeError on source_adddress: %r' % e)
diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py
index ac1768e..02229cf 100644
--- a/test/test_connectionpool.py
+++ b/test/test_connectionpool.py
@@ -13,10 +13,9 @@ from urllib3.exceptions import (
HostChangedError,
MaxRetryError,
SSLError,
- ReadTimeoutError,
)
-from socket import error as SocketError, timeout as SocketTimeout
+from socket import error as SocketError
from ssl import SSLError as BaseSSLError
try: # Python 3
@@ -39,6 +38,11 @@ class TestConnectionPool(unittest.TestCase):
('http://google.com/', 'http://google.com'),
('http://google.com/', 'http://google.com/abra/cadabra'),
('http://google.com:42/', 'http://google.com:42/abracadabra'),
+ # Test comparison using default ports
+ ('http://google.com:80/', 'http://google.com/abracadabra'),
+ ('http://google.com/', 'http://google.com:80/abracadabra'),
+ ('https://google.com:443/', 'https://google.com/abracadabra'),
+ ('https://google.com/', 'https://google.com:443/abracadabra'),
]
for a, b in same_host:
@@ -51,11 +55,22 @@ class TestConnectionPool(unittest.TestCase):
('http://yahoo.com/', 'http://google.com/'),
('http://google.com:42', 'https://google.com/abracadabra'),
('http://google.com', 'https://google.net/'),
+ # Test comparison with default ports
+ ('http://google.com:42', 'http://google.com'),
+ ('https://google.com:42', 'https://google.com'),
+ ('http://google.com:443', 'http://google.com'),
+ ('https://google.com:80', 'https://google.com'),
+ ('http://google.com:443', 'https://google.com'),
+ ('https://google.com:80', 'http://google.com'),
+ ('https://google.com:443', 'http://google.com'),
+ ('http://google.com:80', 'https://google.com'),
]
for a, b in not_same_host:
c = connection_from_url(a)
self.assertFalse(c.is_same_host(b), "%s =? %s" % (a, b))
+ c = connection_from_url(b)
+ self.assertFalse(c.is_same_host(a), "%s =? %s" % (b, a))
def test_max_connections(self):
@@ -128,9 +143,8 @@ class TestConnectionPool(unittest.TestCase):
self.assertEqual(pool.pool.qsize(), POOL_SIZE)
- #make sure that all of the exceptions return the connection to the pool
- _test(Empty, ReadTimeoutError)
- _test(SocketTimeout, ReadTimeoutError)
+ # Make sure that all of the exceptions return the connection to the pool
+ _test(Empty, EmptyPoolError)
_test(BaseSSLError, SSLError)
_test(CertificateError, SSLError)
diff --git a/test/test_exceptions.py b/test/test_exceptions.py
index e20649b..4190a61 100644
--- a/test/test_exceptions.py
+++ b/test/test_exceptions.py
@@ -11,25 +11,36 @@ from urllib3.connectionpool import HTTPConnectionPool
class TestPickle(unittest.TestCase):
- def cycle(self, item):
+ def verify_pickling(self, item):
return pickle.loads(pickle.dumps(item))
def test_exceptions(self):
- assert self.cycle(HTTPError(None))
- assert self.cycle(MaxRetryError(None, None, None))
- assert self.cycle(LocationParseError(None))
- assert self.cycle(ConnectTimeoutError(None))
+ assert self.verify_pickling(HTTPError(None))
+ assert self.verify_pickling(MaxRetryError(None, None, None))
+ assert self.verify_pickling(LocationParseError(None))
+ assert self.verify_pickling(ConnectTimeoutError(None))
def test_exceptions_with_objects(self):
- assert self.cycle(HTTPError('foo'))
- assert self.cycle(MaxRetryError(HTTPConnectionPool('localhost'),
- '/', None))
- assert self.cycle(LocationParseError('fake location'))
- assert self.cycle(ClosedPoolError(HTTPConnectionPool('localhost'),
- None))
- assert self.cycle(EmptyPoolError(HTTPConnectionPool('localhost'),
- None))
- assert self.cycle(HostChangedError(HTTPConnectionPool('localhost'),
- '/', None))
- assert self.cycle(ReadTimeoutError(HTTPConnectionPool('localhost'),
- '/', None))
+ assert self.verify_pickling(
+ HTTPError('foo'))
+
+ assert self.verify_pickling(
+ HTTPError('foo', IOError('foo')))
+
+ assert self.verify_pickling(
+ MaxRetryError(HTTPConnectionPool('localhost'), '/', None))
+
+ assert self.verify_pickling(
+ LocationParseError('fake location'))
+
+ assert self.verify_pickling(
+ ClosedPoolError(HTTPConnectionPool('localhost'), None))
+
+ assert self.verify_pickling(
+ EmptyPoolError(HTTPConnectionPool('localhost'), None))
+
+ assert self.verify_pickling(
+ HostChangedError(HTTPConnectionPool('localhost'), '/', None))
+
+ assert self.verify_pickling(
+ ReadTimeoutError(HTTPConnectionPool('localhost'), '/', None))
diff --git a/test/test_fields.py b/test/test_fields.py
index 888c2d5..cdec68b 100644
--- a/test/test_fields.py
+++ b/test/test_fields.py
@@ -1,34 +1,39 @@
import unittest
from urllib3.fields import guess_content_type, RequestField
-from urllib3.packages.six import b, u
+from urllib3.packages.six import u
class TestRequestField(unittest.TestCase):
def test_guess_content_type(self):
- self.assertEqual(guess_content_type('image.jpg'), 'image/jpeg')
- self.assertEqual(guess_content_type('notsure'), 'application/octet-stream')
- self.assertEqual(guess_content_type(None), 'application/octet-stream')
+ self.assertTrue(guess_content_type('image.jpg') in
+ ['image/jpeg', 'image/pjpeg'])
+ self.assertEqual(guess_content_type('notsure'),
+ 'application/octet-stream')
+ self.assertEqual(guess_content_type(None), 'application/octet-stream')
def test_create(self):
- simple_field = RequestField('somename', 'data')
- self.assertEqual(simple_field.render_headers(), '\r\n')
- filename_field = RequestField('somename', 'data', filename='somefile.txt')
- self.assertEqual(filename_field.render_headers(), '\r\n')
- headers_field = RequestField('somename', 'data', headers={'Content-Length': 4})
- self.assertEqual(headers_field.render_headers(),
- 'Content-Length: 4\r\n'
- '\r\n')
+ simple_field = RequestField('somename', 'data')
+ self.assertEqual(simple_field.render_headers(), '\r\n')
+ filename_field = RequestField('somename', 'data',
+ filename='somefile.txt')
+ self.assertEqual(filename_field.render_headers(), '\r\n')
+ headers_field = RequestField('somename', 'data',
+ headers={'Content-Length': 4})
+ self.assertEqual(
+ headers_field.render_headers(), 'Content-Length: 4\r\n\r\n')
def test_make_multipart(self):
- field = RequestField('somename', 'data')
- field.make_multipart(content_type='image/jpg', content_location='/test')
- self.assertEqual(field.render_headers(),
- 'Content-Disposition: form-data; name="somename"\r\n'
- 'Content-Type: image/jpg\r\n'
- 'Content-Location: /test\r\n'
- '\r\n')
+ field = RequestField('somename', 'data')
+ field.make_multipart(content_type='image/jpg',
+ content_location='/test')
+ self.assertEqual(
+ field.render_headers(),
+ 'Content-Disposition: form-data; name="somename"\r\n'
+ 'Content-Type: image/jpg\r\n'
+ 'Content-Location: /test\r\n'
+ '\r\n')
def test_render_parts(self):
field = RequestField('somename', 'data')
diff --git a/test/test_filepost.py b/test/test_filepost.py
index ca33d61..390dbb3 100644
--- a/test/test_filepost.py
+++ b/test/test_filepost.py
@@ -124,7 +124,7 @@ class TestMultipartEncoding(unittest.TestCase):
encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
- self.assertEquals(encoded,
+ self.assertEqual(encoded,
b'--' + b(BOUNDARY) + b'\r\n'
b'Content-Type: image/jpeg\r\n'
b'\r\n'
diff --git a/test/test_response.py b/test/test_response.py
index 90d34eb..ecfcbee 100644
--- a/test/test_response.py
+++ b/test/test_response.py
@@ -5,6 +5,25 @@ from io import BytesIO, BufferedReader
from urllib3.response import HTTPResponse
from urllib3.exceptions import DecodeError
+
+from base64 import b64decode
+
+# A known random (i.e, not-too-compressible) payload generated with:
+# "".join(random.choice(string.printable) for i in xrange(512))
+# .encode("zlib").encode("base64")
+# Randomness in tests == bad, and fixing a seed may not be sufficient.
+ZLIB_PAYLOAD = b64decode(b"""\
+eJwFweuaoQAAANDfineQhiKLUiaiCzvuTEmNNlJGiL5QhnGpZ99z8luQfe1AHoMioB+QSWHQu/L+
+lzd7W5CipqYmeVTBjdgSATdg4l4Z2zhikbuF+EKn69Q0DTpdmNJz8S33odfJoVEexw/l2SS9nFdi
+pis7KOwXzfSqarSo9uJYgbDGrs1VNnQpT9f8zAorhYCEZronZQF9DuDFfNK3Hecc+WHLnZLQptwk
+nufw8S9I43sEwxsT71BiqedHo0QeIrFE01F/4atVFXuJs2yxIOak3bvtXjUKAA6OKnQJ/nNvDGKZ
+Khe5TF36JbnKVjdcL1EUNpwrWVfQpFYJ/WWm2b74qNeSZeQv5/xBhRdOmKTJFYgO96PwrHBlsnLn
+a3l0LwJsloWpMbzByU5WLbRE6X5INFqjQOtIwYz5BAlhkn+kVqJvWM5vBlfrwP42ifonM5yF4ciJ
+auHVks62997mNGOsM7WXNG3P98dBHPo2NhbTvHleL0BI5dus2JY81MUOnK3SGWLH8HeWPa1t5KcW
+S5moAj5HexY/g/F8TctpxwsvyZp38dXeLDjSQvEQIkF7XR3YXbeZgKk3V34KGCPOAeeuQDIgyVhV
+nP4HF2uWHA==""")
+
+
class TestLegacyResponse(unittest.TestCase):
def test_getheaders(self):
headers = {'host': 'example.com'}
@@ -167,6 +186,23 @@ class TestResponse(unittest.TestCase):
self.assertEqual(next(stream), b'o')
self.assertRaises(StopIteration, next, stream)
+ def test_streaming_tell(self):
+ fp = BytesIO(b'foo')
+ resp = HTTPResponse(fp, preload_content=False)
+ stream = resp.stream(2, decode_content=False)
+
+ position = 0
+
+ position += len(next(stream))
+ self.assertEqual(2, position)
+ self.assertEqual(position, resp.tell())
+
+ position += len(next(stream))
+ self.assertEqual(3, position)
+ self.assertEqual(position, resp.tell())
+
+ self.assertRaises(StopIteration, next, stream)
+
def test_gzipped_streaming(self):
import zlib
compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
@@ -182,6 +218,78 @@ class TestResponse(unittest.TestCase):
self.assertEqual(next(stream), b'oo')
self.assertRaises(StopIteration, next, stream)
+ def test_gzipped_streaming_tell(self):
+ import zlib
+ compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
+ uncompressed_data = b'foo'
+ data = compress.compress(uncompressed_data)
+ data += compress.flush()
+
+ fp = BytesIO(data)
+ resp = HTTPResponse(fp, headers={'content-encoding': 'gzip'},
+ preload_content=False)
+ stream = resp.stream()
+
+ # Read everything
+ payload = next(stream)
+ self.assertEqual(payload, uncompressed_data)
+
+ self.assertEqual(len(data), resp.tell())
+
+ self.assertRaises(StopIteration, next, stream)
+
+ def test_deflate_streaming_tell_intermediate_point(self):
+ # Ensure that ``tell()`` returns the correct number of bytes when
+ # part-way through streaming compressed content.
+ import zlib
+
+ NUMBER_OF_READS = 10
+
+ class MockCompressedDataReading(BytesIO):
+ """
+ A ByteIO-like reader returning ``payload`` in ``NUMBER_OF_READS``
+ calls to ``read``.
+ """
+
+ def __init__(self, payload, payload_part_size):
+ self.payloads = [
+ payload[i*payload_part_size:(i+1)*payload_part_size]
+ for i in range(NUMBER_OF_READS+1)]
+
+ assert b"".join(self.payloads) == payload
+
+ def read(self, _):
+ # Amount is unused.
+ if len(self.payloads) > 0:
+ return self.payloads.pop(0)
+ return b""
+
+ uncompressed_data = zlib.decompress(ZLIB_PAYLOAD)
+
+ payload_part_size = len(ZLIB_PAYLOAD) // NUMBER_OF_READS
+ fp = MockCompressedDataReading(ZLIB_PAYLOAD, payload_part_size)
+ resp = HTTPResponse(fp, headers={'content-encoding': 'deflate'},
+ preload_content=False)
+ stream = resp.stream()
+
+ parts_positions = [(part, resp.tell()) for part in stream]
+ end_of_stream = resp.tell()
+
+ self.assertRaises(StopIteration, next, stream)
+
+ parts, positions = zip(*parts_positions)
+
+ # Check that the payload is equal to the uncompressed data
+ payload = b"".join(parts)
+ self.assertEqual(uncompressed_data, payload)
+
+ # Check that the positions in the stream are correct
+ expected = [(i+1)*payload_part_size for i in range(NUMBER_OF_READS)]
+ self.assertEqual(expected, list(positions))
+
+ # Check that the end of the stream is in the correct place
+ self.assertEqual(len(ZLIB_PAYLOAD), end_of_stream)
+
def test_deflate_streaming(self):
import zlib
data = zlib.compress(b'foo')
@@ -244,6 +352,11 @@ class TestResponse(unittest.TestCase):
self.assertEqual(next(stream), b'o')
self.assertRaises(StopIteration, next, stream)
+ def test_get_case_insensitive_headers(self):
+ headers = {'host': 'example.com'}
+ r = HTTPResponse(headers=headers)
+ self.assertEqual(r.headers.get('host'), 'example.com')
+ self.assertEqual(r.headers.get('Host'), 'example.com')
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_util.py b/test/test_util.py
index b465fef..ebd3b5f 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -64,7 +64,7 @@ class TestUtil(unittest.TestCase):
}
for url, expected_host in url_host_map.items():
returned_host = get_host(url)
- self.assertEquals(returned_host, expected_host)
+ self.assertEqual(returned_host, expected_host)
def test_invalid_host(self):
# TODO: Add more tests
@@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase):
'http://google.com/': Url('http', host='google.com', path='/'),
'http://google.com': Url('http', host='google.com'),
'http://google.com?foo': Url('http', host='google.com', path='', query='foo'),
+
+ # Path/query/fragment
'': Url(),
'/': Url(path='/'),
'?': Url(path='', query=''),
@@ -93,10 +95,22 @@ class TestUtil(unittest.TestCase):
'/foo': Url(path='/foo'),
'/foo?bar=baz': Url(path='/foo', query='bar=baz'),
'/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'),
+
+ # Port
+ 'http://google.com/': Url('http', host='google.com', path='/'),
+ 'http://google.com:80/': Url('http', host='google.com', port=80, path='/'),
+ 'http://google.com:/': Url('http', host='google.com', path='/'),
+ 'http://google.com:80': Url('http', host='google.com', port=80),
+ 'http://google.com:': Url('http', host='google.com'),
+
+ # Auth
+ 'http://foo:bar@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'),
+ 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'),
+ 'http://foo:bar@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'),
}
for url, expected_url in url_host_map.items():
returned_url = parse_url(url)
- self.assertEquals(returned_url, expected_url)
+ self.assertEqual(returned_url, expected_url)
def test_parse_url_invalid_IPv6(self):
self.assertRaises(ValueError, parse_url, '[::1')
@@ -115,7 +129,7 @@ class TestUtil(unittest.TestCase):
}
for url, expected_request_uri in url_host_map.items():
returned_url = parse_url(url)
- self.assertEquals(returned_url.request_uri, expected_request_uri)
+ self.assertEqual(returned_url.request_uri, expected_request_uri)
def test_netloc(self):
url_netloc_map = {
@@ -126,7 +140,7 @@ class TestUtil(unittest.TestCase):
}
for url, expected_netloc in url_netloc_map.items():
- self.assertEquals(parse_url(url).netloc, expected_netloc)
+ self.assertEqual(parse_url(url).netloc, expected_netloc)
def test_make_headers(self):
self.assertEqual(
@@ -157,6 +171,9 @@ class TestUtil(unittest.TestCase):
make_headers(basic_auth='foo:bar'),
{'authorization': 'Basic Zm9vOmJhcg=='})
+ self.assertEqual(
+ make_headers(proxy_basic_auth='foo:bar'),
+ {'proxy-authorization': 'Basic Zm9vOmJhcg=='})
def test_split_first(self):
test_cases = {
@@ -250,6 +267,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(timeout.read_timeout, None)
self.assertEqual(timeout.total, None)
+ timeout = Timeout(5)
+ self.assertEqual(timeout.total, 5)
+
def test_timeout_str(self):
timeout = Timeout(connect=1, read=2, total=3)
diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO
index a81ab9c..6a4f31a 100644
--- a/urllib3.egg-info/PKG-INFO
+++ b/urllib3.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 1.0
+Metadata-Version: 1.1
Name: urllib3
-Version: 1.7.1
+Version: 1.8
Summary: HTTP library with thread-safe connection pooling, file post, and more.
Home-page: http://urllib3.readthedocs.org/
Author: Andrey Petrov
@@ -121,10 +121,62 @@ Description: =======
Changes
=======
+ 1.8 (2014-03-04)
+ ++++++++++++++++
+
+ * Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in
+ username, and blank ports like 'hostname:').
+
+ * New ``urllib3.connection`` module which contains all the HTTPConnection
+ objects.
+
+ * Several ``urllib3.util.Timeout``-related fixes. Also changed constructor
+ signature to a more sensible order. [Backwards incompatible]
+ (Issues #252, #262, #263)
+
+ * Use ``backports.ssl_match_hostname`` if it's installed. (Issue #274)
+
+ * Added ``.tell()`` method to ``urllib3.response.HTTPResponse`` which
+ returns the number of bytes read so far. (Issue #277)
+
+ * Support for platforms without threading. (Issue #289)
+
+ * Expand default-port comparison in ``HTTPConnectionPool.is_same_host``
+ to allow a pool with no specified port to be considered equal to to an
+ HTTP/HTTPS url with port 80/443 explicitly provided. (Issue #305)
+
+ * Improved default SSL/TLS settings to avoid vulnerabilities.
+ (Issue #309)
+
+ * Fixed ``urllib3.poolmanager.ProxyManager`` not retrying on connect errors.
+ (Issue #310)
+
+ * Disable Nagle's Algorithm on the socket for non-proxies. A subset of requests
+ will send the entire HTTP request ~200 milliseconds faster; however, some of
+ the resulting TCP packets will be smaller. (Issue #254)
+
+ * Increased maximum number of SubjectAltNames in ``urllib3.contrib.pyopenssl``
+ from the default 64 to 1024 in a single certificate. (Issue #318)
+
+ * Headers are now passed and stored as a custom
+ ``urllib3.collections_.HTTPHeaderDict`` object rather than a plain ``dict``.
+ (Issue #329, #333)
+
+ * Headers no longer lose their case on Python 3. (Issue #236)
+
+ * ``urllib3.contrib.pyopenssl`` now uses the operating system's default CA
+ certificates on inject. (Issue #332)
+
+ * Requests with ``retries=False`` will immediately raise any exceptions without
+ wrapping them in ``MaxRetryError``. (Issue #348)
+
+ * Fixed open socket leak with SSL-related failures. (Issue #344, #348)
+
+
1.7.1 (2013-09-25)
++++++++++++++++++
- * Added granular timeout support with new `urllib3.util.Timeout` class.
+ * Added granular timeout support with new ``urllib3.util.Timeout`` class.
(Issue #231)
* Fixed Python 3.4 support. (Issue #238)
diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt
index 32759d9..a5170fb 100644
--- a/urllib3.egg-info/SOURCES.txt
+++ b/urllib3.egg-info/SOURCES.txt
@@ -11,9 +11,8 @@ dummyserver/handlers.py
dummyserver/proxy.py
dummyserver/server.py
dummyserver/testcase.py
-test/__init__.py
-test/benchmark.py
test/test_collections.py
+test/test_compatibility.py
test/test_connectionpool.py
test/test_exceptions.py
test/test_fields.py
@@ -24,6 +23,7 @@ test/test_response.py
test/test_util.py
urllib3/__init__.py
urllib3/_collections.py
+urllib3/connection.py
urllib3/connectionpool.py
urllib3/exceptions.py
urllib3/fields.py
@@ -42,4 +42,5 @@ urllib3/contrib/pyopenssl.py
urllib3/packages/__init__.py
urllib3/packages/ordered_dict.py
urllib3/packages/six.py
-urllib3/packages/ssl_match_hostname/__init__.py \ No newline at end of file
+urllib3/packages/ssl_match_hostname/__init__.py
+urllib3/packages/ssl_match_hostname/_implementation.py \ No newline at end of file
diff --git a/urllib3/__init__.py b/urllib3/__init__.py
index eed7006..086387f 100644
--- a/urllib3/__init__.py
+++ b/urllib3/__init__.py
@@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using.
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
-__version__ = '1.7.1'
+__version__ = '1.8'
from .connectionpool import (
diff --git a/urllib3/_collections.py b/urllib3/_collections.py
index 282b8d5..9cea3a4 100644
--- a/urllib3/_collections.py
+++ b/urllib3/_collections.py
@@ -4,16 +4,26 @@
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
-from collections import MutableMapping
-from threading import RLock
+from collections import Mapping, MutableMapping
+try:
+ from threading import RLock
+except ImportError: # Platform-specific: No threads available
+ class RLock:
+ def __enter__(self):
+ pass
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ pass
+
try: # Python 2.7+
from collections import OrderedDict
except ImportError:
from .packages.ordered_dict import OrderedDict
+from .packages.six import itervalues
-__all__ = ['RecentlyUsedContainer']
+__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
_Null = object()
@@ -92,3 +102,104 @@ class RecentlyUsedContainer(MutableMapping):
def keys(self):
with self.lock:
return self._container.keys()
+
+
+class HTTPHeaderDict(MutableMapping):
+ """
+ :param headers:
+ An iterable of field-value pairs. Must not contain multiple field names
+ when compared case-insensitively.
+
+ :param kwargs:
+ Additional field-value pairs to pass in to ``dict.update``.
+
+ A ``dict`` like container for storing HTTP Headers.
+
+ Field names are stored and compared case-insensitively in compliance with
+ RFC 2616. Iteration provides the first case-sensitive key seen for each
+ case-insensitive pair.
+
+ Using ``__setitem__`` syntax overwrites fields that compare equal
+ case-insensitively in order to maintain ``dict``'s api. For fields that
+ compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
+ in a loop.
+
+ If multiple fields that are equal case-insensitively are passed to the
+ constructor or ``.update``, the behavior is undefined and some will be
+ lost.
+
+ >>> headers = HTTPHeaderDict()
+ >>> headers.add('Set-Cookie', 'foo=bar')
+ >>> headers.add('set-cookie', 'baz=quxx')
+ >>> headers['content-length'] = '7'
+ >>> headers['SET-cookie']
+ 'foo=bar, baz=quxx'
+ >>> headers['Content-Length']
+ '7'
+
+ If you want to access the raw headers with their original casing
+ for debugging purposes you can access the private ``._data`` attribute
+ which is a normal python ``dict`` that maps the case-insensitive key to a
+ list of tuples stored as (case-sensitive-original-name, value). Using the
+ structure from above as our example:
+
+ >>> headers._data
+ {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')],
+ 'content-length': [('content-length', '7')]}
+ """
+
+ def __init__(self, headers=None, **kwargs):
+ self._data = {}
+ if headers is None:
+ headers = {}
+ self.update(headers, **kwargs)
+
+ def add(self, key, value):
+ """Adds a (name, value) pair, doesn't overwrite the value if it already
+ exists.
+
+ >>> headers = HTTPHeaderDict(foo='bar')
+ >>> headers.add('Foo', 'baz')
+ >>> headers['foo']
+ 'bar, baz'
+ """
+ self._data.setdefault(key.lower(), []).append((key, value))
+
+ def getlist(self, key):
+ """Returns a list of all the values for the named field. Returns an
+ empty list if the key doesn't exist."""
+ return self[key].split(', ') if key in self else []
+
+ def copy(self):
+ h = HTTPHeaderDict()
+ for key in self._data:
+ for rawkey, value in self._data[key]:
+ h.add(rawkey, value)
+ return h
+
+ def __eq__(self, other):
+ if not isinstance(other, Mapping):
+ return False
+ other = HTTPHeaderDict(other)
+ return dict((k1, self[k1]) for k1 in self._data) == \
+ dict((k2, other[k2]) for k2 in other._data)
+
+ def __getitem__(self, key):
+ values = self._data[key.lower()]
+ return ', '.join(value[1] for value in values)
+
+ def __setitem__(self, key, value):
+ self._data[key.lower()] = [(key, value)]
+
+ def __delitem__(self, key):
+ del self._data[key.lower()]
+
+ def __len__(self):
+ return len(self._data)
+
+ def __iter__(self):
+ for headers in itervalues(self._data):
+ yield headers[0][0]
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
diff --git a/urllib3/connection.py b/urllib3/connection.py
new file mode 100644
index 0000000..662bd2e
--- /dev/null
+++ b/urllib3/connection.py
@@ -0,0 +1,195 @@
+# urllib3/connection.py
+# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+#
+# This module is part of urllib3 and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+import sys
+import socket
+from socket import timeout as SocketTimeout
+
+try: # Python 3
+ from http.client import HTTPConnection as _HTTPConnection, HTTPException
+except ImportError:
+ from httplib import HTTPConnection as _HTTPConnection, HTTPException
+
+class DummyConnection(object):
+ "Used to detect a failed ConnectionCls import."
+ pass
+
+try: # Compiled with SSL?
+ ssl = None
+ HTTPSConnection = DummyConnection
+
+ class BaseSSLError(BaseException):
+ pass
+
+ try: # Python 3
+ from http.client import HTTPSConnection as _HTTPSConnection
+ except ImportError:
+ from httplib import HTTPSConnection as _HTTPSConnection
+
+ import ssl
+ BaseSSLError = ssl.SSLError
+
+except (ImportError, AttributeError): # Platform-specific: No SSL.
+ pass
+
+from .exceptions import (
+ ConnectTimeoutError,
+)
+from .packages.ssl_match_hostname import match_hostname
+from .packages import six
+from .util import (
+ assert_fingerprint,
+ resolve_cert_reqs,
+ resolve_ssl_version,
+ ssl_wrap_socket,
+)
+
+
+port_by_scheme = {
+ 'http': 80,
+ 'https': 443,
+}
+
+
+class HTTPConnection(_HTTPConnection, object):
+ """
+ Based on httplib.HTTPConnection but provides an extra constructor
+ backwards-compatibility layer between older and newer Pythons.
+ """
+
+ default_port = port_by_scheme['http']
+
+ # By default, disable Nagle's Algorithm.
+ tcp_nodelay = 1
+
+ def __init__(self, *args, **kw):
+ if six.PY3: # Python 3
+ kw.pop('strict', None)
+
+ if sys.version_info < (2, 7): # Python 2.6 and earlier
+ kw.pop('source_address', None)
+ self.source_address = None
+
+ _HTTPConnection.__init__(self, *args, **kw)
+
+ def _new_conn(self):
+ """ Establish a socket connection and set nodelay settings on it
+
+ :return: a new socket connection
+ """
+ extra_args = []
+ if self.source_address: # Python 2.7+
+ extra_args.append(self.source_address)
+
+ conn = socket.create_connection(
+ (self.host, self.port),
+ self.timeout,
+ *extra_args
+ )
+ conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
+ self.tcp_nodelay)
+ return conn
+
+ def _prepare_conn(self, conn):
+ self.sock = conn
+ if self._tunnel_host:
+ # TODO: Fix tunnel so it doesn't depend on self.sock state.
+ self._tunnel()
+
+ def connect(self):
+ conn = self._new_conn()
+ self._prepare_conn(conn)
+
+
+class HTTPSConnection(HTTPConnection):
+ default_port = port_by_scheme['https']
+
+ def __init__(self, host, port=None, key_file=None, cert_file=None,
+ strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None):
+
+ HTTPConnection.__init__(self, host, port,
+ strict=strict,
+ timeout=timeout,
+ source_address=source_address)
+
+ self.key_file = key_file
+ self.cert_file = cert_file
+
+ def connect(self):
+ conn = self._new_conn()
+ self._prepare_conn(conn)
+ self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file)
+
+
+class VerifiedHTTPSConnection(HTTPSConnection):
+ """
+ Based on httplib.HTTPSConnection but wraps the socket with
+ SSL certification.
+ """
+ cert_reqs = None
+ ca_certs = None
+ ssl_version = None
+
+ def set_cert(self, key_file=None, cert_file=None,
+ cert_reqs=None, ca_certs=None,
+ assert_hostname=None, assert_fingerprint=None):
+
+ self.key_file = key_file
+ self.cert_file = cert_file
+ self.cert_reqs = cert_reqs
+ self.ca_certs = ca_certs
+ self.assert_hostname = assert_hostname
+ self.assert_fingerprint = assert_fingerprint
+
+ def connect(self):
+ # Add certificate verification
+ try:
+ sock = socket.create_connection(
+ address=(self.host, self.port),
+ timeout=self.timeout,
+ )
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, self.timeout))
+
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
+ self.tcp_nodelay)
+
+ resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
+ resolved_ssl_version = resolve_ssl_version(self.ssl_version)
+
+ # the _tunnel_host attribute was added in python 2.6.3 (via
+ # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do
+ # not have them.
+ if getattr(self, '_tunnel_host', None):
+ self.sock = sock
+ # Calls self._set_hostport(), so self.host is
+ # self._tunnel_host below.
+ self._tunnel()
+
+ # Wrap socket using verification with the root certs in
+ # trusted_root_certs
+ self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
+ cert_reqs=resolved_cert_reqs,
+ ca_certs=self.ca_certs,
+ server_hostname=self.host,
+ ssl_version=resolved_ssl_version)
+
+ if resolved_cert_reqs != ssl.CERT_NONE:
+ if self.assert_fingerprint:
+ assert_fingerprint(self.sock.getpeercert(binary_form=True),
+ self.assert_fingerprint)
+ elif self.assert_hostname is not False:
+ match_hostname(self.sock.getpeercert(),
+ self.assert_hostname or self.host)
+
+
+if ssl:
+ # Make a copy for testing.
+ UnverifiedHTTPSConnection = HTTPSConnection
+ HTTPSConnection = VerifiedHTTPSConnection
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index 691d4e2..6d0dbb1 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -11,134 +11,48 @@ from socket import error as SocketError, timeout as SocketTimeout
import socket
try: # Python 3
- from http.client import HTTPConnection, HTTPException
- from http.client import HTTP_PORT, HTTPS_PORT
-except ImportError:
- from httplib import HTTPConnection, HTTPException
- from httplib import HTTP_PORT, HTTPS_PORT
-
-try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
import Queue as _ # Platform-specific: Windows
-try: # Compiled with SSL?
- HTTPSConnection = object
-
- class BaseSSLError(BaseException):
- pass
-
- ssl = None
-
- try: # Python 3
- from http.client import HTTPSConnection
- except ImportError:
- from httplib import HTTPSConnection
-
- import ssl
- BaseSSLError = ssl.SSLError
-
-except (ImportError, AttributeError): # Platform-specific: No SSL.
- pass
-
-
from .exceptions import (
ClosedPoolError,
+ ConnectionError,
ConnectTimeoutError,
EmptyPoolError,
HostChangedError,
MaxRetryError,
SSLError,
+ TimeoutError,
ReadTimeoutError,
ProxyError,
)
-from .packages.ssl_match_hostname import CertificateError, match_hostname
+from .packages.ssl_match_hostname import CertificateError
from .packages import six
+from .connection import (
+ port_by_scheme,
+ DummyConnection,
+ HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
+ HTTPException, BaseSSLError,
+)
from .request import RequestMethods
from .response import HTTPResponse
from .util import (
assert_fingerprint,
get_host,
is_connection_dropped,
- resolve_cert_reqs,
- resolve_ssl_version,
- ssl_wrap_socket,
Timeout,
)
+
xrange = six.moves.xrange
log = logging.getLogger(__name__)
_Default = object()
-port_by_scheme = {
- 'http': HTTP_PORT,
- 'https': HTTPS_PORT,
-}
-
-
-## Connection objects (extension of httplib)
-
-class VerifiedHTTPSConnection(HTTPSConnection):
- """
- Based on httplib.HTTPSConnection but wraps the socket with
- SSL certification.
- """
- cert_reqs = None
- ca_certs = None
- ssl_version = None
-
- def set_cert(self, key_file=None, cert_file=None,
- cert_reqs=None, ca_certs=None,
- assert_hostname=None, assert_fingerprint=None):
-
- self.key_file = key_file
- self.cert_file = cert_file
- self.cert_reqs = cert_reqs
- self.ca_certs = ca_certs
- self.assert_hostname = assert_hostname
- self.assert_fingerprint = assert_fingerprint
-
- def connect(self):
- # Add certificate verification
- try:
- sock = socket.create_connection(
- address=(self.host, self.port),
- timeout=self.timeout)
- except SocketTimeout:
- raise ConnectTimeoutError(
- self, "Connection to %s timed out. (connect timeout=%s)" %
- (self.host, self.timeout))
-
- resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
- resolved_ssl_version = resolve_ssl_version(self.ssl_version)
-
- if self._tunnel_host:
- self.sock = sock
- # Calls self._set_hostport(), so self.host is
- # self._tunnel_host below.
- self._tunnel()
-
- # Wrap socket using verification with the root certs in
- # trusted_root_certs
- self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
- cert_reqs=resolved_cert_reqs,
- ca_certs=self.ca_certs,
- server_hostname=self.host,
- ssl_version=resolved_ssl_version)
-
- if resolved_cert_reqs != ssl.CERT_NONE:
- if self.assert_fingerprint:
- assert_fingerprint(self.sock.getpeercert(binary_form=True),
- self.assert_fingerprint)
- elif self.assert_hostname is not False:
- match_hostname(self.sock.getpeercert(),
- self.assert_hostname or self.host)
-
-
## Pool objects
class ConnectionPool(object):
@@ -218,6 +132,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
scheme = 'http'
+ ConnectionCls = HTTPConnection
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
@@ -250,19 +165,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _new_conn(self):
"""
- Return a fresh :class:`httplib.HTTPConnection`.
+ Return a fresh :class:`HTTPConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
- extra_params = {}
- if not six.PY3: # Python 2
- extra_params['strict'] = self.strict
-
- return HTTPConnection(host=self.host, port=self.port,
- timeout=self.timeout.connect_timeout,
- **extra_params)
+ conn = self.ConnectionCls(host=self.host, port=self.port,
+ timeout=self.timeout.connect_timeout,
+ strict=self.strict)
+ if self.proxy is not None:
+ # Enable Nagle's algorithm for proxies, to avoid packet
+ # fragmentation.
+ conn.tcp_nodelay = 0
+ return conn
def _get_conn(self, timeout=None):
"""
@@ -319,8 +235,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
pass
except Full:
# This should never happen if self.block == True
- log.warning("HttpConnectionPool is full, discarding connection: %s"
- % self.host)
+ log.warning(
+ "Connection pool is full, discarding connection: %s" %
+ self.host)
# Connection never got put back into the pool, close it.
if conn:
@@ -341,7 +258,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
- Perform a request on a given httplib connection object taken from our
+ Perform a request on a given urllib connection object taken from our
pool.
:param conn:
@@ -362,7 +279,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# conn.request() calls httplib.*.request, not the method in
- # request.py. It also calls makefile (recv) on the socket
+ # urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
except SocketTimeout:
raise ConnectTimeoutError(
@@ -371,11 +288,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
- log.debug("Setting read timeout to %s" % read_timeout)
+
# App Engine doesn't have a sock attr
- if hasattr(conn, 'sock') and \
- read_timeout is not None and \
- read_timeout is not Timeout.DEFAULT_TIMEOUT:
+ if hasattr(conn, 'sock'):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
@@ -385,7 +300,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
- conn.sock.settimeout(read_timeout)
+ if read_timeout is Timeout.DEFAULT_TIMEOUT:
+ conn.sock.settimeout(socket.getdefaulttimeout())
+ else: # None or a value
+ conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
@@ -397,6 +315,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
+ except BaseSSLError as e:
+ # Catch possible read timeouts thrown as SSL errors. If not the
+ # case, rethrow the original. We need to do this because of:
+ # http://bugs.python.org/issue10272
+ if 'timed out' in str(e) or \
+ 'did not complete (read)' in str(e): # Python 2.6
+ raise ReadTimeoutError(self, url, "Read timed out.")
+
+ raise
+
except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
@@ -404,8 +332,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
- raise
+ raise
# AppEngine doesn't have a version attr.
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
@@ -441,9 +369,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# TODO: Add optional support for socket.gethostbyname checking.
scheme, host, port = get_host(url)
+ # Use explicit default port for comparison when none is given
if self.port and not port:
- # Use explicit default port for comparison when none is given.
port = port_by_scheme.get(scheme)
+ elif not self.port and port == port_by_scheme.get(scheme):
+ port = None
return (scheme, host, port) == (self.scheme, self.host, self.port)
@@ -482,10 +412,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param retries:
Number of retries to allow before raising a MaxRetryError exception.
+ If `False`, then retries are disabled and any exception is raised
+ immediately.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
- 303, 307, 308). Each redirect counts as a retry.
+ 303, 307, 308). Each redirect counts as a retry. Disabling retries
+ will disable redirect, too.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
@@ -519,7 +452,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if headers is None:
headers = self.headers
- if retries < 0:
+ if retries < 0 and retries is not False:
raise MaxRetryError(self, url)
if release_conn is None:
@@ -531,6 +464,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn = None
+ # Merge the proxy headers. Only do this in HTTP. We have to copy the
+ # headers dict so we can safely change it without those changes being
+ # reflected in anyone else's copy.
+ if self.scheme == 'http':
+ headers = headers.copy()
+ headers.update(self.proxy_headers)
+
+ # Must keep the exception bound to a separate variable or else Python 3
+ # complains about UnboundLocalError.
+ err = None
+
try:
# Request a connection from the queue
conn = self._get_conn(timeout=pool_timeout)
@@ -558,38 +502,41 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# ``response.read()``)
except Empty:
- # Timed out by queue
- raise ReadTimeoutError(
- self, url, "Read timed out, no pool connections are available.")
+ # Timed out by queue.
+ raise EmptyPoolError(self, "No pool connections are available.")
- except SocketTimeout:
- # Timed out by socket
- raise ReadTimeoutError(self, url, "Read timed out.")
-
- except BaseSSLError as e:
- # SSL certificate error
- if 'timed out' in str(e) or \
- 'did not complete (read)' in str(e): # Platform-specific: Python 2.6
- raise ReadTimeoutError(self, url, "Read timed out.")
+ except (BaseSSLError, CertificateError) as e:
+ # Release connection unconditionally because there is no way to
+ # close it externally in case of exception.
+ release_conn = True
raise SSLError(e)
- except CertificateError as e:
- # Name mismatch
- raise SSLError(e)
+ except (TimeoutError, HTTPException, SocketError) as e:
+ if conn:
+ # Discard the connection for these exceptions. It will be
+ # be replaced during the next _get_conn() call.
+ conn.close()
+ conn = None
- except (HTTPException, SocketError) as e:
- if isinstance(e, SocketError) and self.proxy is not None:
- raise ProxyError('Cannot connect to proxy. '
- 'Socket error: %s.' % e)
+ if not retries:
+ if isinstance(e, TimeoutError):
+ # TimeoutError is exempt from MaxRetryError-wrapping.
+ # FIXME: ... Not sure why. Add a reason here.
+ raise
- # Connection broken, discard. It will be replaced next _get_conn().
- conn = None
- # This is necessary so we can access e below
- err = e
+ # Wrap unexpected exceptions with the most appropriate
+ # module-level exception and re-raise.
+ if isinstance(e, SocketError) and self.proxy:
+ raise ProxyError('Cannot connect to proxy.', e)
+
+ if retries is False:
+ raise ConnectionError('Connection failed.', e)
- if retries == 0:
raise MaxRetryError(self, url, e)
+ # Keep track of the error for the retry warning.
+ err = e
+
finally:
if release_conn:
# Put the connection back to be reused. If the connection is
@@ -599,8 +546,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if not conn:
# Try again
- log.warn("Retrying (%d attempts remain) after connection "
- "broken by '%r': %s" % (retries, err, url))
+ log.warning("Retrying (%d attempts remain) after connection "
+ "broken by '%r': %s" % (retries, err, url))
return self.urlopen(method, url, body, headers, retries - 1,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
@@ -608,7 +555,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Handle redirect?
redirect_location = redirect and response.get_redirect_location()
- if redirect_location:
+ if redirect_location and retries is not False:
if response.status == 303:
method = 'GET'
log.info("Redirecting %s -> %s" % (url, redirect_location))
@@ -626,7 +573,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
When Python is compiled with the :mod:`ssl` module, then
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
- instead of :class:`httplib.HTTPSConnection`.
+ instead of :class:`.HTTPSConnection`.
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
``assert_hostname`` and ``host`` in this order to verify connections.
@@ -639,6 +586,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
"""
scheme = 'https'
+ ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
@@ -658,33 +606,33 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
- def _prepare_conn(self, connection):
+ def _prepare_conn(self, conn):
"""
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
and establish the tunnel if proxy is used.
"""
- if isinstance(connection, VerifiedHTTPSConnection):
- connection.set_cert(key_file=self.key_file,
- cert_file=self.cert_file,
- cert_reqs=self.cert_reqs,
- ca_certs=self.ca_certs,
- assert_hostname=self.assert_hostname,
- assert_fingerprint=self.assert_fingerprint)
- connection.ssl_version = self.ssl_version
+ if isinstance(conn, VerifiedHTTPSConnection):
+ conn.set_cert(key_file=self.key_file,
+ cert_file=self.cert_file,
+ cert_reqs=self.cert_reqs,
+ ca_certs=self.ca_certs,
+ assert_hostname=self.assert_hostname,
+ assert_fingerprint=self.assert_fingerprint)
+ conn.ssl_version = self.ssl_version
if self.proxy is not None:
# Python 2.7+
try:
- set_tunnel = connection.set_tunnel
+ set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
- set_tunnel = connection._set_tunnel
+ set_tunnel = conn._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
- connection.connect()
+ conn.connect()
- return connection
+ return conn
def _new_conn(self):
"""
@@ -694,28 +642,30 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
+ if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
+ # Platform-specific: Python without ssl
+ raise SSLError("Can't connect to HTTPS URL because the SSL "
+ "module is not available.")
+
actual_host = self.host
actual_port = self.port
if self.proxy is not None:
actual_host = self.proxy.host
actual_port = self.proxy.port
- if not ssl: # Platform-specific: Python compiled without +ssl
- if not HTTPSConnection or HTTPSConnection is object:
- raise SSLError("Can't connect to HTTPS URL because the SSL "
- "module is not available.")
- connection_class = HTTPSConnection
- else:
- connection_class = VerifiedHTTPSConnection
-
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
- connection = connection_class(host=actual_host, port=actual_port,
- timeout=self.timeout.connect_timeout,
- **extra_params)
- return self._prepare_conn(connection)
+ conn = self.ConnectionCls(host=actual_host, port=actual_port,
+ timeout=self.timeout.connect_timeout,
+ **extra_params)
+ if self.proxy is not None:
+ # Enable Nagle's algorithm for proxies, to avoid packet
+ # fragmentation.
+ conn.tcp_nodelay = 0
+
+ return self._prepare_conn(conn)
def connection_from_url(url, **kw):
diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py
index d43bcd6..7c513f3 100644
--- a/urllib3/contrib/pyopenssl.py
+++ b/urllib3/contrib/pyopenssl.py
@@ -1,4 +1,4 @@
-'''SSL with SNI-support for Python 2.
+'''SSL with SNI_-support for Python 2.
This needs the following packages installed:
@@ -18,17 +18,36 @@ your application begins using ``urllib3``, like this::
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
+
+Activating this module also has the positive side effect of disabling SSL/TLS
+encryption in Python 2 (see `CRIME attack`_).
+
+If you want to configure the default list of supported cipher suites, you can
+set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable.
+
+Module Variables
+----------------
+
+:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites.
+ Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:
+ ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS``
+
+.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
+.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
+
'''
from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
-from ndg.httpsclient.subj_alt_name import SubjectAltName
+from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
-from socket import _fileobject
+from pyasn1.type import univ, constraint
+from socket import _fileobject, timeout
import ssl
+import select
from cStringIO import StringIO
-from .. import connectionpool
+from .. import connection
from .. import util
__all__ = ['inject_into_urllib3', 'extract_from_urllib3']
@@ -49,26 +68,54 @@ _openssl_verify = {
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
+# A secure default.
+# Sources for more information on TLS ciphers:
+#
+# - https://wiki.mozilla.org/Security/Server_Side_TLS
+# - https://www.ssllabs.com/projects/best-practices/index.html
+# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
+#
+# The general intent is:
+# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
+# - prefer ECDHE over DHE for better performance,
+# - prefer any AES-GCM over any AES-CBC for better performance and security,
+# - use 3DES as fallback which is secure but slow,
+# - disable NULL authentication, MD5 MACs and DSS for security reasons.
+DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \
+ "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \
+ "!aNULL:!MD5:!DSS"
+
orig_util_HAS_SNI = util.HAS_SNI
-orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket
+orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket
def inject_into_urllib3():
'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.'
- connectionpool.ssl_wrap_socket = ssl_wrap_socket
+ connection.ssl_wrap_socket = ssl_wrap_socket
util.HAS_SNI = HAS_SNI
def extract_from_urllib3():
'Undo monkey-patching by :func:`inject_into_urllib3`.'
- connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket
+ connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket
util.HAS_SNI = orig_util_HAS_SNI
### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
+class SubjectAltName(BaseSubjectAltName):
+ '''ASN.1 implementation for subjectAltNames support'''
+
+ # There is no limit to how many SAN certificates a certificate may have,
+ # however this needs to have some limit so we'll set an arbitrarily high
+ # limit.
+ sizeSpec = univ.SequenceOf.sizeSpec + \
+ constraint.ValueSizeConstraint(1, 1024)
+
+
+### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
def get_subj_alt_name(peer_cert):
# Search through extensions
dns_name = []
@@ -101,6 +148,13 @@ def get_subj_alt_name(peer_cert):
class fileobject(_fileobject):
+ def _wait_for_sock(self):
+ rd, wd, ed = select.select([self._sock], [], [],
+ self._sock.gettimeout())
+ if not rd:
+ raise timeout()
+
+
def read(self, size=-1):
# Use max, disallow tiny reads in a loop as they are very inefficient.
# We never leave read() with any leftover data from a new recv() call
@@ -118,6 +172,7 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(rbufsize)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
@@ -145,6 +200,7 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(left)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
@@ -196,6 +252,7 @@ class fileobject(_fileobject):
break
buffers.append(data)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
break
return "".join(buffers)
@@ -206,6 +263,7 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
@@ -233,7 +291,8 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
- continue
+ self._wait_for_sock()
+ continue
if not data:
break
left = size - buf_len
@@ -328,6 +387,15 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ctx.load_verify_locations(ca_certs, None)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e)
+ else:
+ ctx.set_default_verify_paths()
+
+ # Disable TLS compression to migitate CRIME attack (issue #309)
+ OP_NO_COMPRESSION = 0x20000
+ ctx.set_options(OP_NO_COMPRESSION)
+
+ # Set list of supported ciphersuites.
+ ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST)
cnx = OpenSSL.SSL.Connection(ctx, sock)
cnx.set_tlsext_host_name(server_hostname)
@@ -336,6 +404,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError:
+ select.select([sock], [], [])
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad handshake', e)
diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py
index 98ef9ab..b4df831 100644
--- a/urllib3/exceptions.py
+++ b/urllib3/exceptions.py
@@ -44,6 +44,11 @@ class ProxyError(HTTPError):
pass
+class ConnectionError(HTTPError):
+ "Raised when a normal connection fails."
+ pass
+
+
class DecodeError(HTTPError):
"Raised when automatic decoding based on Content-Type fails."
pass
diff --git a/urllib3/filepost.py b/urllib3/filepost.py
index 4575582..e8b30bd 100644
--- a/urllib3/filepost.py
+++ b/urllib3/filepost.py
@@ -46,16 +46,15 @@ def iter_field_objects(fields):
def iter_fields(fields):
"""
- Iterate over fields.
+ .. deprecated:: 1.6
- .. deprecated ::
+ Iterate over fields.
- The addition of `~urllib3.fields.RequestField` makes this function
- obsolete. Instead, use :func:`iter_field_objects`, which returns
- `~urllib3.fields.RequestField` objects, instead.
+ The addition of :class:`~urllib3.fields.RequestField` makes this function
+ obsolete. Instead, use :func:`iter_field_objects`, which returns
+ :class:`~urllib3.fields.RequestField` objects.
Supports list of (k, v) tuples and dicts.
-
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))
diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py
index 2d61ac2..dd59a75 100644
--- a/urllib3/packages/ssl_match_hostname/__init__.py
+++ b/urllib3/packages/ssl_match_hostname/__init__.py
@@ -1,98 +1,13 @@
-"""The match_hostname() function from Python 3.2, essential when using SSL."""
-
-import re
-
-__version__ = '3.2.2'
-
-class CertificateError(ValueError):
- pass
-
-def _dnsname_match(dn, hostname, max_wildcards=1):
- """Matching according to RFC 6125, section 6.4.3
-
- http://tools.ietf.org/html/rfc6125#section-6.4.3
- """
- pats = []
- if not dn:
- return False
-
- parts = dn.split(r'.')
- leftmost = parts[0]
-
- wildcards = leftmost.count('*')
- if wildcards > max_wildcards:
- # Issue #17980: avoid denials of service by refusing more
- # than one wildcard per fragment. A survery of established
- # policy among SSL implementations showed it to be a
- # reasonable choice.
- raise CertificateError(
- "too many wildcards in certificate DNS name: " + repr(dn))
-
- # speed up common case w/o wildcards
- if not wildcards:
- return dn.lower() == hostname.lower()
-
- # RFC 6125, section 6.4.3, subitem 1.
- # The client SHOULD NOT attempt to match a presented identifier in which
- # the wildcard character comprises a label other than the left-most label.
- if leftmost == '*':
- # When '*' is a fragment by itself, it matches a non-empty dotless
- # fragment.
- pats.append('[^.]+')
- elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
- # RFC 6125, section 6.4.3, subitem 3.
- # The client SHOULD NOT attempt to match a presented identifier
- # where the wildcard character is embedded within an A-label or
- # U-label of an internationalized domain name.
- pats.append(re.escape(leftmost))
- else:
- # Otherwise, '*' matches any dotless string, e.g. www*
- pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
-
- # add the remaining fragments, ignore any wildcards
- for frag in parts[1:]:
- pats.append(re.escape(frag))
-
- pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
- return pat.match(hostname)
-
-
-def match_hostname(cert, hostname):
- """Verify that *cert* (in decoded format as returned by
- SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
- rules are followed, but IP addresses are not accepted for *hostname*.
-
- CertificateError is raised on failure. On success, the function
- returns nothing.
- """
- if not cert:
- raise ValueError("empty or no certificate")
- dnsnames = []
- san = cert.get('subjectAltName', ())
- for key, value in san:
- if key == 'DNS':
- if _dnsname_match(value, hostname):
- return
- dnsnames.append(value)
- if not dnsnames:
- # The subject is only checked when there is no dNSName entry
- # in subjectAltName
- for sub in cert.get('subject', ()):
- for key, value in sub:
- # XXX according to RFC 2818, the most specific Common Name
- # must be used.
- if key == 'commonName':
- if _dnsname_match(value, hostname):
- return
- dnsnames.append(value)
- if len(dnsnames) > 1:
- raise CertificateError("hostname %r "
- "doesn't match either of %s"
- % (hostname, ', '.join(map(repr, dnsnames))))
- elif len(dnsnames) == 1:
- raise CertificateError("hostname %r "
- "doesn't match %r"
- % (hostname, dnsnames[0]))
- else:
- raise CertificateError("no appropriate commonName or "
- "subjectAltName fields were found")
+try:
+ # Python 3.2+
+ from ssl import CertificateError, match_hostname
+except ImportError:
+ try:
+ # Backport of the function from a pypi module
+ from backports.ssl_match_hostname import CertificateError, match_hostname
+ except ImportError:
+ # Our vendored copy
+ from ._implementation import CertificateError, match_hostname
+
+# Not needed, but documenting what we provide.
+__all__ = ('CertificateError', 'match_hostname')
diff --git a/urllib3/packages/ssl_match_hostname/_implementation.py b/urllib3/packages/ssl_match_hostname/_implementation.py
new file mode 100644
index 0000000..52f4287
--- /dev/null
+++ b/urllib3/packages/ssl_match_hostname/_implementation.py
@@ -0,0 +1,105 @@
+"""The match_hostname() function from Python 3.3.3, essential when using SSL."""
+
+# Note: This file is under the PSF license as the code comes from the python
+# stdlib. http://docs.python.org/3/license.html
+
+import re
+
+__version__ = '3.4.0.2'
+
+class CertificateError(ValueError):
+ pass
+
+
+def _dnsname_match(dn, hostname, max_wildcards=1):
+ """Matching according to RFC 6125, section 6.4.3
+
+ http://tools.ietf.org/html/rfc6125#section-6.4.3
+ """
+ pats = []
+ if not dn:
+ return False
+
+ # Ported from python3-syntax:
+ # leftmost, *remainder = dn.split(r'.')
+ parts = dn.split(r'.')
+ leftmost = parts[0]
+ remainder = parts[1:]
+
+ wildcards = leftmost.count('*')
+ if wildcards > max_wildcards:
+ # Issue #17980: avoid denials of service by refusing more
+ # than one wildcard per fragment. A survey of established
+ # policy among SSL implementations showed it to be a
+ # reasonable choice.
+ raise CertificateError(
+ "too many wildcards in certificate DNS name: " + repr(dn))
+
+ # speed up common case w/o wildcards
+ if not wildcards:
+ return dn.lower() == hostname.lower()
+
+ # RFC 6125, section 6.4.3, subitem 1.
+ # The client SHOULD NOT attempt to match a presented identifier in which
+ # the wildcard character comprises a label other than the left-most label.
+ if leftmost == '*':
+ # When '*' is a fragment by itself, it matches a non-empty dotless
+ # fragment.
+ pats.append('[^.]+')
+ elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
+ # RFC 6125, section 6.4.3, subitem 3.
+ # The client SHOULD NOT attempt to match a presented identifier
+ # where the wildcard character is embedded within an A-label or
+ # U-label of an internationalized domain name.
+ pats.append(re.escape(leftmost))
+ else:
+ # Otherwise, '*' matches any dotless string, e.g. www*
+ pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
+
+ # add the remaining fragments, ignore any wildcards
+ for frag in remainder:
+ pats.append(re.escape(frag))
+
+ pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
+ return pat.match(hostname)
+
+
+def match_hostname(cert, hostname):
+ """Verify that *cert* (in decoded format as returned by
+ SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
+ rules are followed, but IP addresses are not accepted for *hostname*.
+
+ CertificateError is raised on failure. On success, the function
+ returns nothing.
+ """
+ if not cert:
+ raise ValueError("empty or no certificate")
+ dnsnames = []
+ san = cert.get('subjectAltName', ())
+ for key, value in san:
+ if key == 'DNS':
+ if _dnsname_match(value, hostname):
+ return
+ dnsnames.append(value)
+ if not dnsnames:
+ # The subject is only checked when there is no dNSName entry
+ # in subjectAltName
+ for sub in cert.get('subject', ()):
+ for key, value in sub:
+ # XXX according to RFC 2818, the most specific Common Name
+ # must be used.
+ if key == 'commonName':
+ if _dnsname_match(value, hostname):
+ return
+ dnsnames.append(value)
+ if len(dnsnames) > 1:
+ raise CertificateError("hostname %r "
+ "doesn't match either of %s"
+ % (hostname, ', '.join(map(repr, dnsnames))))
+ elif len(dnsnames) == 1:
+ raise CertificateError("hostname %r "
+ "doesn't match %r"
+ % (hostname, dnsnames[0]))
+ else:
+ raise CertificateError("no appropriate commonName or "
+ "subjectAltName fields were found")
diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py
index e7f8667..f18ff2b 100644
--- a/urllib3/poolmanager.py
+++ b/urllib3/poolmanager.py
@@ -1,5 +1,5 @@
# urllib3/poolmanager.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
@@ -176,7 +176,7 @@ class ProxyManager(PoolManager):
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.
- :param poxy_url:
+ :param proxy_url:
The URL of the proxy to be used.
:param proxy_headers:
@@ -245,12 +245,11 @@ class ProxyManager(PoolManager):
u = parse_url(url)
if u.scheme == "http":
- # It's too late to set proxy headers on per-request basis for
- # tunnelled HTTPS connections, should use
- # constructor's proxy_headers instead.
+ # For proxied HTTPS requests, httplib sets the necessary headers
+ # on the CONNECT to the proxy. For HTTP, we'll definitely
+ # need to set 'Host' at the very least.
kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
self.headers))
- kw['headers'].update(self.proxy_headers)
return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
diff --git a/urllib3/request.py b/urllib3/request.py
index 66a9a0e..2a92cc2 100644
--- a/urllib3/request.py
+++ b/urllib3/request.py
@@ -45,7 +45,6 @@ class RequestMethods(object):
"""
_encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
- _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE'])
def __init__(self, headers=None):
self.headers = headers or {}
diff --git a/urllib3/response.py b/urllib3/response.py
index 4efff5a..db44182 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -9,6 +9,7 @@ import logging
import zlib
import io
+from ._collections import HTTPHeaderDict
from .exceptions import DecodeError
from .packages.six import string_types as basestring, binary_type
from .util import is_fp_closed
@@ -79,7 +80,10 @@ class HTTPResponse(io.IOBase):
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None):
- self.headers = headers or {}
+
+ self.headers = HTTPHeaderDict()
+ if headers:
+ self.headers.update(headers)
self.status = status
self.version = version
self.reason = reason
@@ -90,6 +94,7 @@ class HTTPResponse(io.IOBase):
self._body = body if body and isinstance(body, basestring) else None
self._fp = None
self._original_response = original_response
+ self._fp_bytes_read = 0
self._pool = pool
self._connection = connection
@@ -129,6 +134,14 @@ class HTTPResponse(io.IOBase):
if self._fp:
return self.read(cache_content=True)
+ def tell(self):
+ """
+ Obtain the number of bytes pulled over the wire so far. May differ from
+ the amount of content returned by :meth:``HTTPResponse.read`` if bytes
+ are encoded on the wire (e.g, compressed).
+ """
+ return self._fp_bytes_read
+
def read(self, amt=None, decode_content=None, cache_content=False):
"""
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
@@ -183,6 +196,8 @@ class HTTPResponse(io.IOBase):
self._fp.close()
flush_decoder = True
+ self._fp_bytes_read += len(data)
+
try:
if decode_content and self._decoder:
data = self._decoder.decompress(data)
@@ -238,17 +253,9 @@ class HTTPResponse(io.IOBase):
with ``original_response=r``.
"""
- # Normalize headers between different versions of Python
- headers = {}
+ headers = HTTPHeaderDict()
for k, v in r.getheaders():
- # Python 3: Header keys are returned capitalised
- k = k.lower()
-
- has_value = headers.get(k)
- if has_value: # Python 3: Repeating header keys are unmerged.
- v = ', '.join([has_value, v])
-
- headers[k] = v
+ headers.add(k, v)
# HTTPResponse objects in Python 3 don't have a .strict attribute
strict = getattr(r, 'strict', 0)
diff --git a/urllib3/util.py b/urllib3/util.py
index 266c9ed..bd26631 100644
--- a/urllib3/util.py
+++ b/urllib3/util.py
@@ -80,14 +80,13 @@ class Timeout(object):
:type read: integer, float, or None
:param total:
- The maximum amount of time to wait for an HTTP request to connect and
- return. This combines the connect and read timeouts into one. In the
+ This combines the connect and read timeouts into one; the read timeout
+ will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
-
:type total: integer, float, or None
.. note::
@@ -101,18 +100,23 @@ class Timeout(object):
`total`.
In addition, the read and total timeouts only measure the time between
- read operations on the socket connecting the client and the server, not
- the total amount of time for the request to return a complete response.
- As an example, you may want a request to return within 7 seconds or
- fail, so you set the ``total`` timeout to 7 seconds. If the server
- sends one byte to you every 5 seconds, the request will **not** trigger
- time out. This case is admittedly rare.
+ read operations on the socket connecting the client and the server,
+ not the total amount of time for the request to return a complete
+ response. For most requests, the timeout is raised because the server
+ has not sent the first byte in the specified time. This is not always
+ the case; if a server streams one byte every fifteen seconds, a timeout
+ of 20 seconds will not ever trigger, even though the request will
+ take several minutes to complete.
+
+ If your goal is to cut off any request after a set amount of wall clock
+ time, consider having a second "watcher" thread to cut off a slow
+ request.
"""
#: A sentinel object representing the default timeout value
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
- def __init__(self, connect=_Default, read=_Default, total=None):
+ def __init__(self, total=None, connect=_Default, read=_Default):
self._connect = self._validate_timeout(connect, 'connect')
self._read = self._validate_timeout(read, 'read')
self.total = self._validate_timeout(total, 'total')
@@ -372,7 +376,8 @@ def parse_url(url):
# Auth
if '@' in url:
- auth, url = url.split('@', 1)
+ # Last '@' denotes end of auth part
+ auth, url = url.rsplit('@', 1)
# IPv6
if url and url[0] == '[':
@@ -386,10 +391,14 @@ def parse_url(url):
if not host:
host = _host
- if not port.isdigit():
- raise LocationParseError("Failed to parse: %s" % url)
-
- port = int(port)
+ if port:
+ # If given, ports must be integers.
+ if not port.isdigit():
+ raise LocationParseError("Failed to parse: %s" % url)
+ port = int(port)
+ else:
+ # Blank ports are cool, too. (rfc3986#section-3.2.3)
+ port = None
elif not host and url:
host = url
@@ -417,7 +426,7 @@ def get_host(url):
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
- basic_auth=None):
+ basic_auth=None, proxy_basic_auth=None):
"""
Shortcuts for generating request headers.
@@ -438,6 +447,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
Colon-separated username:password string for 'authorization: basic ...'
auth header.
+ :param proxy_basic_auth:
+ Colon-separated username:password string for 'proxy-authorization: basic ...'
+ auth header.
+
Example: ::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
@@ -465,6 +478,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
headers['authorization'] = 'Basic ' + \
b64encode(six.b(basic_auth)).decode('utf-8')
+ if proxy_basic_auth:
+ headers['proxy-authorization'] = 'Basic ' + \
+ b64encode(six.b(proxy_basic_auth)).decode('utf-8')
+
return headers
@@ -603,6 +620,11 @@ if SSLContext is not None: # Python 3.2+
"""
context = SSLContext(ssl_version)
context.verify_mode = cert_reqs
+
+ # Disable TLS compression to migitate CRIME attack (issue #309)
+ OP_NO_COMPRESSION = 0x20000
+ context.options |= OP_NO_COMPRESSION
+
if ca_certs:
try:
context.load_verify_locations(ca_certs)