Imported Upstream version 1.5

author: SVN-Git Migration <python-modules-team@lists.alioth.debian.org> 2015-10-08 13:19:32 -0700
committer: SVN-Git Migration <python-modules-team@lists.alioth.debian.org> 2015-10-08 13:19:32 -0700
commit: e5b66555b54a9854b340975471e8cdfa64e311f7 (patch)
tree: 21c6529cbca37472e7d77b1b55014a9f6f27687b /urllib3
parent: 77245469d4fbd400c6702cde35f9d9002540663e (diff)
download: python-urllib3-e5b66555b54a9854b340975471e8cdfa64e311f7.tar
python-urllib3-e5b66555b54a9854b340975471e8cdfa64e311f7.tar.gz
12 files changed, 632 insertions, 370 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py
index 2d6fece..b552543 100644
--- a/urllib3/__init__.py
+++ b/urllib3/__init__.py
@@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using.
 
 __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
 __license__ = 'MIT'
-__version__ = '1.3'
+__version__ = '1.5'
 
 
 from .connectionpool import (
@@ -28,7 +28,7 @@ from .util import make_headers, get_host
 
 # Set default logging handler to avoid "No handler found" warnings.
 import logging
-try:
+try:  # Python 2.7+
     from logging import NullHandler
 except ImportError:
     class NullHandler(logging.Handler):
@@ -37,6 +37,22 @@ except ImportError:
 
 logging.getLogger(__name__).addHandler(NullHandler())
 
+def add_stderr_logger(level=logging.DEBUG):
+    """
+    Helper for quickly adding a StreamHandler to the logger. Useful for
+    debugging.
+
+    Returns the handler after adding it.
+    """
+    # This method needs to be in this __init__.py to get the __name__ correct
+    # even if urllib3 is vendored within another package.
+    logger = logging.getLogger(__name__)
+    handler = logging.StreamHandler()
+    handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+    logger.addHandler(handler)
+    logger.setLevel(level)
+    logger.debug('Added an stderr logging handler to logger: %s' % __name__)
+    return handler
+
 # ... Clean up.
-del logging
 del NullHandler
diff --git a/urllib3/_collections.py b/urllib3/_collections.py
index 3cef081..a052b1d 100644
--- a/urllib3/_collections.py
+++ b/urllib3/_collections.py
@@ -4,128 +4,91 @@
 # This module is part of urllib3 and is released under
 # the MIT License: http://www.opensource.org/licenses/mit-license.php
 
-from collections import deque
+from collections import MutableMapping
+from threading import Lock
 
-from threading import RLock
+try: # Python 2.7+
+    from collections import OrderedDict
+except ImportError:
+    from .packages.ordered_dict import OrderedDict
 
-__all__ = ['RecentlyUsedContainer']
 
+__all__ = ['RecentlyUsedContainer']
 
-class AccessEntry(object):
-    __slots__ = ('key', 'is_valid')
 
-    def __init__(self, key, is_valid=True):
-        self.key = key
-        self.is_valid = is_valid
+_Null = object()
 
 
-class RecentlyUsedContainer(dict):
-    """
-    Provides a dict-like that maintains up to ``maxsize`` keys while throwing
-    away the least-recently-used keys beyond ``maxsize``.
+class RecentlyUsedContainer(MutableMapping):
     """
+    Provides a thread-safe dict-like container which maintains up to
+    ``maxsize`` keys while throwing away the least-recently-used keys beyond
+    ``maxsize``.
 
-    # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we
-    # will attempt to cleanup the invalidated entries in the access_log
-    # datastructure during the next 'get' operation.
-    CLEANUP_FACTOR = 10
-
-    def __init__(self, maxsize=10):
-        self._maxsize = maxsize
-
-        self._container = {}
-
-        # We use a deque to to store our keys ordered by the last access.
-        self.access_log = deque()
-        self.access_log_lock = RLock()
-
-        # We look up the access log entry by the key to invalidate it so we can
-        # insert a new authorative entry at the head without having to dig and
-        # find the old entry for removal immediately.
-        self.access_lookup = {}
-
-        # Trigger a heap cleanup when we get past this size
-        self.access_log_limit = maxsize * self.CLEANUP_FACTOR
-
-    def _invalidate_entry(self, key):
-        "If exists: Invalidate old entry and return it."
-        old_entry = self.access_lookup.get(key)
-        if old_entry:
-            old_entry.is_valid = False
+    :param maxsize:
+        Maximum number of recent elements to retain.
 
-        return old_entry
-
-    def _push_entry(self, key):
-        "Push entry onto our access log, invalidate the old entry if exists."
-        self._invalidate_entry(key)
-
-        new_entry = AccessEntry(key)
-        self.access_lookup[key] = new_entry
-
-        self.access_log_lock.acquire()
-        self.access_log.appendleft(new_entry)
-        self.access_log_lock.release()
-
-    def _prune_entries(self, num):
-        "Pop entries from our access log until we popped ``num`` valid ones."
-        while num > 0:
-            self.access_log_lock.acquire()
-            p = self.access_log.pop()
-            self.access_log_lock.release()
+    :param dispose_func:
+        Every time an item is evicted from the container,
+        ``dispose_func(value)`` is called.  Callback which will get called
+    """
 
-            if not p.is_valid:
-                continue # Invalidated entry, skip
+    ContainerCls = OrderedDict
 
-            dict.pop(self, p.key, None)
-            self.access_lookup.pop(p.key, None)
-            num -= 1
+    def __init__(self, maxsize=10, dispose_func=None):
+        self._maxsize = maxsize
+        self.dispose_func = dispose_func
 
-    def _prune_invalidated_entries(self):
-        "Rebuild our access_log without the invalidated entries."
-        self.access_log_lock.acquire()
-        self.access_log = deque(e for e in self.access_log if e.is_valid)
-        self.access_log_lock.release()
+        self._container = self.ContainerCls()
+        self._lock = Lock()
 
-    def _get_ordered_access_keys(self):
-        "Return ordered access keys for inspection. Used for testing."
-        self.access_log_lock.acquire()
-        r = [e.key for e in self.access_log if e.is_valid]
-        self.access_log_lock.release()
+    def __getitem__(self, key):
+        # Re-insert the item, moving it to the end of the eviction line.
+        with self._lock:
+            item = self._container.pop(key)
+            self._container[key] = item
+            return item
 
-        return r
+    def __setitem__(self, key, value):
+        evicted_value = _Null
+        with self._lock:
+            # Possibly evict the existing value of 'key'
+            evicted_value = self._container.get(key, _Null)
+            self._container[key] = value
 
-    def __getitem__(self, key):
-        item = dict.get(self, key)
+            # If we didn't evict an existing value, we might have to evict the
+            # least recently used item from the beginning of the container.
+            if len(self._container) > self._maxsize:
+                _key, evicted_value = self._container.popitem(last=False)
 
-        if not item:
-            raise KeyError(key)
+        if self.dispose_func and evicted_value is not _Null:
+            self.dispose_func(evicted_value)
 
-        # Insert new entry with new high priority, also implicitly invalidates
-        # the old entry.
-        self._push_entry(key)
+    def __delitem__(self, key):
+        with self._lock:
+            value = self._container.pop(key)
 
-        if len(self.access_log) > self.access_log_limit:
-            # Heap is getting too big, try to clean up any tailing invalidated
-            # entries.
-            self._prune_invalidated_entries()
+        if self.dispose_func:
+            self.dispose_func(value)
 
-        return item
+    def __len__(self):
+        with self._lock:
+            return len(self._container)
 
-    def __setitem__(self, key, item):
-        # Add item to our container and access log
-        dict.__setitem__(self, key, item)
-        self._push_entry(key)
+    def __iter__(self):
+        raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.')
 
-        # Discard invalid and excess entries
-        self._prune_entries(len(self) - self._maxsize)
+    def clear(self):
+        with self._lock:
+            # Copy pointers to all values, then wipe the mapping
+            # under Python 2, this copies the list of values twice :-|
+            values = list(self._container.values())
+            self._container.clear()
 
-    def __delitem__(self, key):
-        self._invalidate_entry(key)
-        self.access_lookup.pop(key, None)
-        dict.__delitem__(self, key)
+        if self.dispose_func:
+            for value in values:
+                self.dispose_func(value)
 
-    def get(self, key, default=None):
-        try:
-            return self[key]
-        except KeyError:
-            return default
+    def keys(self):
+        with self._lock:
+            return self._container.keys()
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index c3cb3b1..97da544 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -7,27 +7,27 @@
 import logging
 import socket
 
-from socket import error as SocketError, timeout as SocketTimeout
+from socket import timeout as SocketTimeout
 
-try:   # Python 3
+try: # Python 3
     from http.client import HTTPConnection, HTTPException
     from http.client import HTTP_PORT, HTTPS_PORT
 except ImportError:
     from httplib import HTTPConnection, HTTPException
     from httplib import HTTP_PORT, HTTPS_PORT
 
-try:   # Python 3
+try: # Python 3
     from queue import LifoQueue, Empty, Full
 except ImportError:
     from Queue import LifoQueue, Empty, Full
 
 
-try:   # Compiled with SSL?
+try: # Compiled with SSL?
     HTTPSConnection = object
     BaseSSLError = None
     ssl = None
 
-    try:   # Python 3
+    try: # Python 3
         from http.client import HTTPSConnection
     except ImportError:
         from httplib import HTTPSConnection
@@ -35,7 +35,7 @@ try:   # Compiled with SSL?
     import ssl
     BaseSSLError = ssl.SSLError
 
-except (ImportError, AttributeError):
+except (ImportError, AttributeError): # Platform-specific: No SSL.
     pass
 
 
@@ -43,6 +43,7 @@ from .request import RequestMethods
 from .response import HTTPResponse
 from .util import get_host, is_connection_dropped
 from .exceptions import (
+    ClosedPoolError,
     EmptyPoolError,
     HostChangedError,
     MaxRetryError,
@@ -206,10 +207,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
         try:
             conn = self.pool.get(block=self.block, timeout=timeout)
 
-            # If this is a persistent connection, check if it got disconnected
-            if conn and is_connection_dropped(conn):
-                log.info("Resetting dropped connection: %s" % self.host)
-                conn.close()
+        except AttributeError: # self.pool is None
+            raise ClosedPoolError(self, "Pool is closed.")
 
         except Empty:
             if self.block:
@@ -218,6 +217,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
                                      "connections are allowed.")
             pass  # Oh well, we'll create a new connection then
 
+        # If this is a persistent connection, check if it got disconnected
+        if conn and is_connection_dropped(conn):
+            log.info("Resetting dropped connection: %s" % self.host)
+            conn.close()
+
         return conn or self._new_conn()
 
     def _put_conn(self, conn):
@@ -228,17 +232,26 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
             Connection object for the current host and port as returned by
             :meth:`._new_conn` or :meth:`._get_conn`.
 
-        If the pool is already full, the connection is discarded because we
-        exceeded maxsize. If connections are discarded frequently, then maxsize
-        should be increased.
+        If the pool is already full, the connection is closed and discarded
+        because we exceeded maxsize. If connections are discarded frequently,
+        then maxsize should be increased.
+
+        If the pool is closed, then the connection will be closed and discarded.
         """
         try:
             self.pool.put(conn, block=False)
+            return # Everything is dandy, done.
+        except AttributeError:
+            # self.pool is None.
+            pass
         except Full:
             # This should never happen if self.block == True
             log.warning("HttpConnectionPool is full, discarding connection: %s"
                         % self.host)
 
+        # Connection never got put back into the pool, close it.
+        conn.close()
+
     def _make_request(self, conn, method, url, timeout=_Default,
                       **httplib_request_kw):
         """
@@ -258,21 +271,42 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
         if sock:
             sock.settimeout(timeout)
 
-        httplib_response = conn.getresponse()
-
-        log.debug("\"%s %s %s\" %s %s" %
-                  (method, url,
-                   conn._http_vsn_str, # pylint: disable-msg=W0212
-                   httplib_response.status, httplib_response.length))
+        try: # Python 2.7+, use buffering of HTTP responses
+            httplib_response = conn.getresponse(buffering=True)
+        except TypeError: # Python 2.6 and older
+            httplib_response = conn.getresponse()
 
+        # AppEngine doesn't have a version attr.
+        http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
+        log.debug("\"%s %s %s\" %s %s" % (method, url, http_version,
+                                          httplib_response.status,
+                                          httplib_response.length))
         return httplib_response
 
+    def close(self):
+        """
+        Close all pooled connections and disable the pool.
+        """
+        # Disable access to the pool
+        old_pool, self.pool = self.pool, None
+
+        try:
+            while True:
+                conn = old_pool.get(block=False)
+                if conn:
+                    conn.close()
+
+        except Empty:
+            pass # Done.
 
     def is_same_host(self, url):
         """
         Check if the given ``url`` is a member of the same host as this
         connection pool.
         """
+        if url.startswith('/'):
+            return True
+
         # TODO: Add optional support for socket.gethostbyname checking.
         scheme, host, port = get_host(url)
 
@@ -280,8 +314,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
             # Use explicit default port for comparison when none is given.
             port = port_by_scheme.get(scheme)
 
-        return (url.startswith('/') or
-                (scheme, host, port) == (self.scheme, self.host, self.port))
+        return (scheme, host, port) == (self.scheme, self.host, self.port)
 
     def urlopen(self, method, url, body=None, headers=None, retries=3,
                 redirect=True, assert_same_host=True, timeout=_Default,
@@ -320,8 +353,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
             Number of retries to allow before raising a MaxRetryError exception.
 
         :param redirect:
-            Automatically handle redirects (status codes 301, 302, 303, 307),
-            each redirect counts as a retry.
+            If True, automatically handle redirects (status codes 301, 302,
+            303, 307). Each redirect counts as a retry.
 
         :param assert_same_host:
             If ``True``, will make sure that the host of the pool requests is
@@ -374,7 +407,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
 
         try:
             # Request a connection from the queue
-            # (Could raise SocketError: Bad file descriptor)
             conn = self._get_conn(timeout=pool_timeout)
 
             # Make the request on the httplib connection object
@@ -417,29 +449,38 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
             # Name mismatch
             raise SSLError(e)
 
-        except (HTTPException, SocketError) as e:
+        except HTTPException as e:
             # Connection broken, discard. It will be replaced next _get_conn().
             conn = None
             # This is necessary so we can access e below
             err = e
 
         finally:
-            if conn and release_conn:
-                # Put the connection back to be reused
+            if release_conn:
+                # Put the connection back to be reused. If the connection is
+                # expired then it will be None, which will get replaced with a
+                # fresh connection during _get_conn.
                 self._put_conn(conn)
 
         if not conn:
+            # Try again
             log.warn("Retrying (%d attempts remain) after connection "
                      "broken by '%r': %s" % (retries, err, url))
             return self.urlopen(method, url, body, headers, retries - 1,
-                                redirect, assert_same_host)  # Try again
+                                redirect, assert_same_host,
+                                timeout=timeout, pool_timeout=pool_timeout,
+                                release_conn=release_conn, **response_kw)
 
         # Handle redirect?
         redirect_location = redirect and response.get_redirect_location()
         if redirect_location:
+            if response.status == 303:
+                method = 'GET'
             log.info("Redirecting %s -> %s" % (url, redirect_location))
             return self.urlopen(method, redirect_location, body, headers,
-                                retries - 1, redirect, assert_same_host)
+                                retries - 1, redirect, assert_same_host,
+                                timeout=timeout, pool_timeout=pool_timeout,
+                                release_conn=release_conn, **response_kw)
 
         return response
 
diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/urllib3/contrib/__init__.py
+++ /dev/null
diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py
deleted file mode 100644
index bb41fd1..0000000
--- a/urllib3/contrib/ntlmpool.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# urllib3/contrib/ntlmpool.py
-# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
-"""
-NTLM authenticating pool, contributed by erikcederstran
-
-Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
-"""
-
-try:
-    from http.client import HTTPSConnection
-except ImportError:
-    from httplib import HTTPSConnection
-from logging import getLogger
-from ntlm import ntlm
-
-from urllib3 import HTTPSConnectionPool
-
-
-log = getLogger(__name__)
-
-
-class NTLMConnectionPool(HTTPSConnectionPool):
-    """
-    Implements an NTLM authentication version of an urllib3 connection pool
-    """
-
-    scheme = 'https'
-
-    def __init__(self, user, pw, authurl, *args, **kwargs):
-        """
-        authurl is a random URL on the server that is protected by NTLM.
-        user is the Windows user, probably in the DOMAIN\username format.
-        pw is the password for the user.
-        """
-        super(NTLMConnectionPool, self).__init__(*args, **kwargs)
-        self.authurl = authurl
-        self.rawuser = user
-        user_parts = user.split('\\', 1)
-        self.domain = user_parts[0].upper()
-        self.user = user_parts[1]
-        self.pw = pw
-
-    def _new_conn(self):
-        # Performs the NTLM handshake that secures the connection. The socket
-        # must be kept open while requests are performed.
-        self.num_connections += 1
-        log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' %
-                  (self.num_connections, self.host, self.authurl))
-
-        headers = {}
-        headers['Connection'] = 'Keep-Alive'
-        req_header = 'Authorization'
-        resp_header = 'www-authenticate'
-
-        conn = HTTPSConnection(host=self.host, port=self.port)
-
-        # Send negotiation message
-        headers[req_header] = (
-            'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
-        log.debug('Request headers: %s' % headers)
-        conn.request('GET', self.authurl, None, headers)
-        res = conn.getresponse()
-        reshdr = dict(res.getheaders())
-        log.debug('Response status: %s %s' % (res.status, res.reason))
-        log.debug('Response headers: %s' % reshdr)
-        log.debug('Response data: %s [...]' % res.read(100))
-
-        # Remove the reference to the socket, so that it can not be closed by
-        # the response object (we want to keep the socket open)
-        res.fp = None
-
-        # Server should respond with a challenge message
-        auth_header_values = reshdr[resp_header].split(', ')
-        auth_header_value = None
-        for s in auth_header_values:
-            if s[:5] == 'NTLM ':
-                auth_header_value = s[5:]
-        if auth_header_value is None:
-            raise Exception('Unexpected %s response header: %s' %
-                            (resp_header, reshdr[resp_header]))
-
-        # Send authentication message
-        ServerChallenge, NegotiateFlags = \
-            ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
-        auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
-                                                         self.user,
-                                                         self.domain,
-                                                         self.pw,
-                                                         NegotiateFlags)
-        headers[req_header] = 'NTLM %s' % auth_msg
-        log.debug('Request headers: %s' % headers)
-        conn.request('GET', self.authurl, None, headers)
-        res = conn.getresponse()
-        log.debug('Response status: %s %s' % (res.status, res.reason))
-        log.debug('Response headers: %s' % dict(res.getheaders()))
-        log.debug('Response data: %s [...]' % res.read()[:100])
-        if res.status != 200:
-            if res.status == 401:
-                raise Exception('Server rejected request: wrong '
-                                'username or password')
-            raise Exception('Wrong server response: %s %s' %
-                            (res.status, res.reason))
-
-        res.fp = None
-        log.debug('Connection established')
-        return conn
-
-    def urlopen(self, method, url, body=None, headers=None, retries=3,
-                redirect=True, assert_same_host=True):
-        if headers is None:
-            headers = {}
-        headers['Connection'] = 'Keep-Alive'
-        return super(NTLMConnectionPool, self).urlopen(method, url, body,
-                                                       headers, retries,
-                                                       redirect,
-                                                       assert_same_host)
diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py
index 15c9699..99ebb67 100644
--- a/urllib3/exceptions.py
+++ b/urllib3/exceptions.py
@@ -24,6 +24,11 @@ class SSLError(HTTPError):
     pass
 
 
+class DecodeError(HTTPError):
+    "Raised when automatic decoding based on Content-Type fails."
+    pass
+
+
 ## Leaf Exceptions
 
 class MaxRetryError(PoolError):
@@ -57,6 +62,11 @@ class EmptyPoolError(PoolError):
     pass
 
 
+class ClosedPoolError(PoolError):
+    "Raised when a request enters a pool after the pool has been closed."
+    pass
+
+
 class LocationParseError(ValueError, HTTPError):
     "Raised when get_host or similar fails to parse the URL input."
 
diff --git a/urllib3/filepost.py b/urllib3/filepost.py
index 344a103..e679b93 100644
--- a/urllib3/filepost.py
+++ b/urllib3/filepost.py
@@ -7,11 +7,7 @@
 import codecs
 import mimetypes
 
-try:
-    from mimetools import choose_boundary
-except ImportError:
-    from .packages.mimetools_choose_boundary import choose_boundary
-
+from uuid import uuid4
 from io import BytesIO
 
 from .packages import six
@@ -20,6 +16,13 @@ from .packages.six import b
 writer = codecs.lookup('utf-8')[3]
 
 
+def choose_boundary():
+    """
+    Our embarassingly-simple replacement for mimetools.choose_boundary.
+    """
+    return uuid4().hex
+
+
 def get_content_type(filename):
     return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
 
diff --git a/urllib3/packages/mimetools_choose_boundary/__init__.py b/urllib3/packages/mimetools_choose_boundary/__init__.py
deleted file mode 100644
index a0109ab..0000000
--- a/urllib3/packages/mimetools_choose_boundary/__init__.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""The function mimetools.choose_boundary() from Python 2.7, which seems to
-have disappeared in Python 3 (although email.generator._make_boundary() might
-work as a replacement?).
-
-Tweaked to use lock from threading rather than thread.
-"""
-import os
-from threading import Lock
-_counter_lock = Lock()
-
-_counter = 0
-def _get_next_counter():
-    global _counter
-    with _counter_lock:
-        _counter += 1
-        return _counter
-
-_prefix = None
-
-def choose_boundary():
-    """Return a string usable as a multipart boundary.
-
-    The string chosen is unique within a single program run, and
-    incorporates the user id (if available), process id (if available),
-    and current time.  So it's very unlikely the returned string appears
-    in message text, but there's no guarantee.
-
-    The boundary contains dots so you have to quote it in the header."""
-
-    global _prefix
-    import time
-    if _prefix is None:
-        import socket
-        try:
-            hostid = socket.gethostbyname(socket.gethostname())
-        except socket.gaierror:
-            hostid = '127.0.0.1'
-        try:
-            uid = repr(os.getuid())
-        except AttributeError:
-            uid = '1'
-        try:
-            pid = repr(os.getpid())
-        except AttributeError:
-            pid = '1'
-        _prefix = hostid + '.' + uid + '.' + pid
-    return "%s.%.3f.%d" % (_prefix, time.time(), _get_next_counter())
diff --git a/urllib3/packages/ordered_dict.py b/urllib3/packages/ordered_dict.py
new file mode 100644
index 0000000..7f8ee15
--- /dev/null
+++ b/urllib3/packages/ordered_dict.py
@@ -0,0 +1,260 @@
+# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
+# Passes Python2.7's test suite and incorporates all the latest updates.
+# Copyright 2009 Raymond Hettinger, released under the MIT License.
+# http://code.activestate.com/recipes/576693/
+
+try:
+    from thread import get_ident as _get_ident
+except ImportError:
+    from dummy_thread import get_ident as _get_ident
+
+try:
+    from _abcoll import KeysView, ValuesView, ItemsView
+except ImportError:
+    pass
+
+
+class OrderedDict(dict):
+    'Dictionary that remembers insertion order'
+    # An inherited dict maps keys to values.
+    # The inherited dict provides __getitem__, __len__, __contains__, and get.
+    # The remaining methods are order-aware.
+    # Big-O running times for all methods are the same as for regular dictionaries.
+
+    # The internal self.__map dictionary maps keys to links in a doubly linked list.
+    # The circular doubly linked list starts and ends with a sentinel element.
+    # The sentinel element never gets deleted (this simplifies the algorithm).
+    # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
+
+    def __init__(self, *args, **kwds):
+        '''Initialize an ordered dictionary.  Signature is the same as for
+        regular dictionaries, but keyword arguments are not recommended
+        because their insertion order is arbitrary.
+
+        '''
+        if len(args) > 1:
+            raise TypeError('expected at most 1 arguments, got %d' % len(args))
+        try:
+            self.__root
+        except AttributeError:
+            self.__root = root = []                     # sentinel node
+            root[:] = [root, root, None]
+            self.__map = {}
+        self.__update(*args, **kwds)
+
+    def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
+        'od.__setitem__(i, y) <==> od[i]=y'
+        # Setting a new item creates a new link which goes at the end of the linked
+        # list, and the inherited dictionary is updated with the new key/value pair.
+        if key not in self:
+            root = self.__root
+            last = root[0]
+            last[1] = root[0] = self.__map[key] = [last, root, key]
+        dict_setitem(self, key, value)
+
+    def __delitem__(self, key, dict_delitem=dict.__delitem__):
+        'od.__delitem__(y) <==> del od[y]'
+        # Deleting an existing item uses self.__map to find the link which is
+        # then removed by updating the links in the predecessor and successor nodes.
+        dict_delitem(self, key)
+        link_prev, link_next, key = self.__map.pop(key)
+        link_prev[1] = link_next
+        link_next[0] = link_prev
+
+    def __iter__(self):
+        'od.__iter__() <==> iter(od)'
+        root = self.__root
+        curr = root[1]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[1]
+
+    def __reversed__(self):
+        'od.__reversed__() <==> reversed(od)'
+        root = self.__root
+        curr = root[0]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[0]
+
+    def clear(self):
+        'od.clear() -> None.  Remove all items from od.'
+        try:
+            for node in self.__map.itervalues():
+                del node[:]
+            root = self.__root
+            root[:] = [root, root, None]
+            self.__map.clear()
+        except AttributeError:
+            pass
+        dict.clear(self)
+
+    def popitem(self, last=True):
+        '''od.popitem() -> (k, v), return and remove a (key, value) pair.
+        Pairs are returned in LIFO order if last is true or FIFO order if false.
+
+        '''
+        if not self:
+            raise KeyError('dictionary is empty')
+        root = self.__root
+        if last:
+            link = root[0]
+            link_prev = link[0]
+            link_prev[1] = root
+            root[0] = link_prev
+        else:
+            link = root[1]
+            link_next = link[1]
+            root[1] = link_next
+            link_next[0] = root
+        key = link[2]
+        del self.__map[key]
+        value = dict.pop(self, key)
+        return key, value
+
+    # -- the following methods do not depend on the internal structure --
+
+    def keys(self):
+        'od.keys() -> list of keys in od'
+        return list(self)
+
+    def values(self):
+        'od.values() -> list of values in od'
+        return [self[key] for key in self]
+
+    def items(self):
+        'od.items() -> list of (key, value) pairs in od'
+        return [(key, self[key]) for key in self]
+
+    def iterkeys(self):
+        'od.iterkeys() -> an iterator over the keys in od'
+        return iter(self)
+
+    def itervalues(self):
+        'od.itervalues -> an iterator over the values in od'
+        for k in self:
+            yield self[k]
+
+    def iteritems(self):
+        'od.iteritems -> an iterator over the (key, value) items in od'
+        for k in self:
+            yield (k, self[k])
+
+    def update(*args, **kwds):
+        '''od.update(E, **F) -> None.  Update od from dict/iterable E and F.
+
+        If E is a dict instance, does:           for k in E: od[k] = E[k]
+        If E has a .keys() method, does:         for k in E.keys(): od[k] = E[k]
+        Or if E is an iterable of items, does:   for k, v in E: od[k] = v
+        In either case, this is followed by:     for k, v in F.items(): od[k] = v
+
+        '''
+        if len(args) > 2:
+            raise TypeError('update() takes at most 2 positional '
+                            'arguments (%d given)' % (len(args),))
+        elif not args:
+            raise TypeError('update() takes at least 1 argument (0 given)')
+        self = args[0]
+        # Make progressively weaker assumptions about "other"
+        other = ()
+        if len(args) == 2:
+            other = args[1]
+        if isinstance(other, dict):
+            for key in other:
+                self[key] = other[key]
+        elif hasattr(other, 'keys'):
+            for key in other.keys():
+                self[key] = other[key]
+        else:
+            for key, value in other:
+                self[key] = value
+        for key, value in kwds.items():
+            self[key] = value
+
+    __update = update  # let subclasses override update without breaking __init__
+
+    __marker = object()
+
+    def pop(self, key, default=__marker):
+        '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
+        If key is not found, d is returned if given, otherwise KeyError is raised.
+
+        '''
+        if key in self:
+            result = self[key]
+            del self[key]
+            return result
+        if default is self.__marker:
+            raise KeyError(key)
+        return default
+
+    def setdefault(self, key, default=None):
+        'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
+        if key in self:
+            return self[key]
+        self[key] = default
+        return default
+
+    def __repr__(self, _repr_running={}):
+        'od.__repr__() <==> repr(od)'
+        call_key = id(self), _get_ident()
+        if call_key in _repr_running:
+            return '...'
+        _repr_running[call_key] = 1
+        try:
+            if not self:
+                return '%s()' % (self.__class__.__name__,)
+            return '%s(%r)' % (self.__class__.__name__, self.items())
+        finally:
+            del _repr_running[call_key]
+
+    def __reduce__(self):
+        'Return state information for pickling'
+        items = [[k, self[k]] for k in self]
+        inst_dict = vars(self).copy()
+        for k in vars(OrderedDict()):
+            inst_dict.pop(k, None)
+        if inst_dict:
+            return (self.__class__, (items,), inst_dict)
+        return self.__class__, (items,)
+
+    def copy(self):
+        'od.copy() -> a shallow copy of od'
+        return self.__class__(self)
+
+    @classmethod
+    def fromkeys(cls, iterable, value=None):
+        '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
+        and values equal to v (which defaults to None).
+
+        '''
+        d = cls()
+        for key in iterable:
+            d[key] = value
+        return d
+
+    def __eq__(self, other):
+        '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
+        while comparison to a regular mapping is order-insensitive.
+
+        '''
+        if isinstance(other, OrderedDict):
+            return len(self)==len(other) and self.items() == other.items()
+        return dict.__eq__(self, other)
+
+    def __ne__(self, other):
+        return not self == other
+
+    # -- the following methods are only used in Python 2.7 --
+
+    def viewkeys(self):
+        "od.viewkeys() -> a set-like object providing a view on od's keys"
+        return KeysView(self)
+
+    def viewvalues(self):
+        "od.viewvalues() -> an object providing a view on od's values"
+        return ValuesView(self)
+
+    def viewitems(self):
+        "od.viewitems() -> a set-like object providing a view on od's items"
+        return ItemsView(self)
diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py
index 310ea21..8f5b54c 100644
--- a/urllib3/poolmanager.py
+++ b/urllib3/poolmanager.py
@@ -8,9 +8,9 @@ import logging
 
 from ._collections import RecentlyUsedContainer
 from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
-from .connectionpool import get_host, connection_from_url, port_by_scheme
-from .exceptions import HostChangedError
+from .connectionpool import connection_from_url, port_by_scheme
 from .request import RequestMethods
+from .util import parse_url
 
 
 __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
@@ -48,19 +48,29 @@ class PoolManager(RequestMethods):
 
     """
 
-    # TODO: Make sure there are no memory leaks here.
-
     def __init__(self, num_pools=10, **connection_pool_kw):
         self.connection_pool_kw = connection_pool_kw
-        self.pools = RecentlyUsedContainer(num_pools)
+        self.pools = RecentlyUsedContainer(num_pools,
+                                           dispose_func=lambda p: p.close())
+
+    def clear(self):
+        """
+        Empty our store of pools and direct them all to close.
+
+        This will not affect in-flight connections, but they will not be
+        re-used after completion.
+        """
+        self.pools.clear()
 
-    def connection_from_host(self, host, port=80, scheme='http'):
+    def connection_from_host(self, host, port=None, scheme='http'):
         """
         Get a :class:`ConnectionPool` based on the host, port, and scheme.
 
-        Note that an appropriate ``port`` value is required here to normalize
-        connection pools in our container most effectively.
+        If ``port`` isn't given, it will be derived from the ``scheme`` using
+        ``urllib3.connectionpool.port_by_scheme``.
         """
+        port = port or port_by_scheme.get(scheme, 80)
+
         pool_key = (scheme, host, port)
 
         # If the scheme, host, or port doesn't match existing open connections,
@@ -86,26 +96,36 @@ class PoolManager(RequestMethods):
         Additional parameters are taken from the :class:`.PoolManager`
         constructor.
         """
-        scheme, host, port = get_host(url)
-
-        port = port or port_by_scheme.get(scheme, 80)
-
-        return self.connection_from_host(host, port=port, scheme=scheme)
+        u = parse_url(url)
+        return self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
 
-    def urlopen(self, method, url, **kw):
+    def urlopen(self, method, url, redirect=True, **kw):
         """
-        Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`.
+        Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
+        with custom cross-host redirect logic and only sends the request-uri
+        portion of the ``url``.
 
-        ``url`` must be absolute, such that an appropriate
+        The given ``url`` parameter must be absolute, such that an appropriate
         :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
         """
-        conn = self.connection_from_url(url)
-        try:
-            return conn.urlopen(method, url, **kw)
+        u = parse_url(url)
+        conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
+
+        kw['assert_same_host'] = False
+        kw['redirect'] = False
+
+        response = conn.urlopen(method, u.request_uri, **kw)
+
+        redirect_location = redirect and response.get_redirect_location()
+        if not redirect_location:
+            return response
+
+        if response.status == 303:
+            method = 'GET'
 
-        except HostChangedError as e:
-            kw['retries'] = e.retries # Persist retries countdown
-            return self.urlopen(method, e.url, **kw)
+        log.info("Redirecting %s -> %s" % (url, redirect_location))
+        kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown
+        return self.urlopen(method, redirect_location, **kw)
 
 
 class ProxyManager(RequestMethods):
diff --git a/urllib3/response.py b/urllib3/response.py
index 5fab824..28537d3 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -10,7 +10,7 @@ import zlib
 
 from io import BytesIO
 
-from .exceptions import HTTPError
+from .exceptions import DecodeError
 from .packages.six import string_types as basestring
 
 
@@ -148,9 +148,9 @@ class HTTPResponse(object):
             try:
                 if decode_content and decoder:
                     data = decoder(data)
-            except IOError:
-                raise HTTPError("Received response with content-encoding: %s, but "
-                                "failed to decode it." % content_encoding)
+            except (IOError, zlib.error):
+                raise DecodeError("Received response with content-encoding: %s, but "
+                                  "failed to decode it." % content_encoding)
 
             if cache_content:
                 self._body = data
diff --git a/urllib3/util.py b/urllib3/util.py
index 2684a2f..8ec990b 100644
--- a/urllib3/util.py
+++ b/urllib3/util.py
@@ -6,6 +6,8 @@
 
 
 from base64 import b64encode
+from collections import namedtuple
+from socket import error as SocketError
 
 try:
     from select import poll, POLLIN
@@ -20,6 +22,152 @@ from .packages import six
 from .exceptions import LocationParseError
 
 
+class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
+    """
+    Datastructure for representing an HTTP URL. Used as a return value for
+    :func:`parse_url`.
+    """
+    slots = ()
+
+    def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
+        return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
+
+    @property
+    def hostname(self):
+        """For backwards-compatibility with urlparse. We're nice like that."""
+        return self.host
+
+    @property
+    def request_uri(self):
+        """Absolute path including the query string."""
+        uri = self.path or '/'
+
+        if self.query is not None:
+            uri += '?' + self.query
+
+        return uri
+
+
+def split_first(s, delims):
+    """
+    Given a string and an iterable of delimiters, split on the first found
+    delimiter. Return two split parts and the matched delimiter.
+
+    If not found, then the first part is the full input string.
+
+    Example: ::
+
+        >>> split_first('foo/bar?baz', '?/=')
+        ('foo', 'bar?baz', '/')
+        >>> split_first('foo/bar?baz', '123')
+        ('foo/bar?baz', '', None)
+
+    Scales linearly with number of delims. Not ideal for large number of delims.
+    """
+    min_idx = None
+    min_delim = None
+    for d in delims:
+        idx = s.find(d)
+        if idx < 0:
+            continue
+
+        if min_idx is None or idx < min_idx:
+            min_idx = idx
+            min_delim = d
+
+    if min_idx is None or min_idx < 0:
+        return s, '', None
+
+    return s[:min_idx], s[min_idx+1:], min_delim
+
+
+def parse_url(url):
+    """
+    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+    performed to parse incomplete urls. Fields not provided will be None.
+
+    Partly backwards-compatible with :mod:`urlparse`.
+
+    Example: ::
+
+        >>> parse_url('http://google.com/mail/')
+        Url(scheme='http', host='google.com', port=None, path='/', ...)
+        >>> prase_url('google.com:80')
+        Url(scheme=None, host='google.com', port=80, path=None, ...)
+        >>> prase_url('/foo?bar')
+        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+    """
+
+    # While this code has overlap with stdlib's urlparse, it is much
+    # simplified for our needs and less annoying.
+    # Additionally, this imeplementations does silly things to be optimal
+    # on CPython.
+
+    scheme = None
+    auth = None
+    host = None
+    port = None
+    path = None
+    fragment = None
+    query = None
+
+    # Scheme
+    if '://' in url:
+        scheme, url = url.split('://', 1)
+
+    # Find the earliest Authority Terminator
+    # (http://tools.ietf.org/html/rfc3986#section-3.2)
+    url, path_, delim = split_first(url, ['/', '?', '#'])
+
+    if delim:
+        # Reassemble the path
+        path = delim + path_
+
+    # Auth
+    if '@' in url:
+        auth, url = url.split('@', 1)
+
+    # IPv6
+    if url and url[0] == '[':
+        host, url = url[1:].split(']', 1)
+
+    # Port
+    if ':' in url:
+        _host, port = url.split(':', 1)
+
+        if not host:
+            host = _host
+
+        if not port.isdigit():
+            raise LocationParseError("Failed to parse: %s" % url)
+
+        port = int(port)
+
+    elif not host and url:
+        host = url
+
+    if not path:
+        return Url(scheme, auth, host, port, path, query, fragment)
+
+    # Fragment
+    if '#' in path:
+        path, fragment = path.split('#', 1)
+
+    # Query
+    if '?' in path:
+        path, query = path.split('?', 1)
+
+    return Url(scheme, auth, host, port, path, query, fragment)
+
+
+def get_host(url):
+    """
+    Deprecated. Use :func:`.parse_url` instead.
+    """
+    p = parse_url(url)
+    return p.scheme or 'http', p.hostname, p.port
+
+
 def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
                  basic_auth=None):
     """
@@ -72,60 +220,28 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
     return headers
 
 
-def get_host(url):
-    """
-    Given a url, return its scheme, host and port (None if it's not there).
-
-    For example: ::
-
-        >>> get_host('http://google.com/mail/')
-        ('http', 'google.com', None)
-        >>> get_host('google.com:80')
-        ('http', 'google.com', 80)
-    """
-
-    # This code is actually similar to urlparse.urlsplit, but much
-    # simplified for our needs.
-    port = None
-    scheme = 'http'
-
-    if '://' in url:
-        scheme, url = url.split('://', 1)
-    if '/' in url:
-        url, _path = url.split('/', 1)
-    if '@' in url:
-        _auth, url = url.split('@', 1)
-    if ':' in url:
-        url, port = url.split(':', 1)
-
-        if not port.isdigit():
-            raise LocationParseError("Failed to parse: %s" % url)
-
-        port = int(port)
-
-    return scheme, url, port
-
-
-
 def is_connection_dropped(conn):
     """
     Returns True if the connection is dropped and should be closed.
 
     :param conn:
-        ``HTTPConnection`` object.
+        :class:`httplib.HTTPConnection` object.
 
     Note: For platforms like AppEngine, this will always return ``False`` to
     let the platform handle connection recycling transparently for us.
     """
     sock = getattr(conn, 'sock', False)
-    if not sock: #Platform-specific: AppEngine
+    if not sock: # Platform-specific: AppEngine
         return False
 
     if not poll: # Platform-specific
-        if not select: #Platform-specific: AppEngine
+        if not select: # Platform-specific: AppEngine
             return False
 
-        return select([sock], [], [], 0.0)[0]
+        try:
+            return select([sock], [], [], 0.0)[0]
+        except SocketError:
+            return True
 
     # This version is better on platforms that support it.
     p = poll()
author	SVN-Git Migration <python-modules-team@lists.alioth.debian.org>	2015-10-08 13:19:32 -0700
committer	SVN-Git Migration <python-modules-team@lists.alioth.debian.org>	2015-10-08 13:19:32 -0700
commit	e5b66555b54a9854b340975471e8cdfa64e311f7 (patch)
tree	21c6529cbca37472e7d77b1b55014a9f6f27687b /urllib3
parent	77245469d4fbd400c6702cde35f9d9002540663e (diff)
download	python-urllib3-e5b66555b54a9854b340975471e8cdfa64e311f7.tar python-urllib3-e5b66555b54a9854b340975471e8cdfa64e311f7.tar.gz