path: root/requests/packages/urllib3/connectionpool.py
diff options
Diffstat (limited to 'requests/packages/urllib3/connectionpool.py')
1 files changed, 525 insertions, 0 deletions
diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py
new file mode 100644
index 0000000..8b10dc7
--- /dev/null
+++ b/requests/packages/urllib3/connectionpool.py
@@ -0,0 +1,525 @@
+# urllib3/connectionpool.py
+# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+# This module is part of urllib3 and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+import logging
+import socket
+from httplib import HTTPConnection, HTTPSConnection, HTTPException
+from Queue import Queue, Empty, Full
+from select import select
+from socket import error as SocketError, timeout as SocketTimeout
+ import ssl
+ BaseSSLError = ssl.SSLError
+except ImportError:
+ ssl = None
+ BaseSSLError = None
+from .request import RequestMethods
+from .response import HTTPResponse
+from .exceptions import (
+ SSLError,
+ MaxRetryError,
+ TimeoutError,
+ HostChangedError,
+ EmptyPoolError,
+log = logging.getLogger(__name__)
+_Default = object()
+## Connection objects (extension of httplib)
+class VerifiedHTTPSConnection(HTTPSConnection):
+ """
+ Based on httplib.HTTPSConnection but wraps the socket with
+ SSL certification.
+ """
+ cert_reqs = None
+ ca_certs = None
+ def set_cert(self, key_file=None, cert_file=None,
+ cert_reqs='CERT_NONE', ca_certs=None):
+ ssl_req_scheme = {
+ }
+ self.key_file = key_file
+ self.cert_file = cert_file
+ self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE
+ self.ca_certs = ca_certs
+ def connect(self):
+ # Add certificate verification
+ sock = socket.create_connection((self.host, self.port), self.timeout)
+ # Wrap socket using verification with the root certs in
+ # trusted_root_certs
+ self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
+ cert_reqs=self.cert_reqs,
+ ca_certs=self.ca_certs)
+## Pool objects
+class ConnectionPool(object):
+ """
+ Base class for all connection pools, such as
+ :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
+ """
+ pass
+class HTTPConnectionPool(ConnectionPool, RequestMethods):
+ """
+ Thread-safe connection pool for one host.
+ :param host:
+ Host used for this HTTP Connection (e.g. "localhost"), passed into
+ :class:`httplib.HTTPConnection`.
+ :param port:
+ Port used for this HTTP Connection (None is equivalent to 80), passed
+ into :class:`httplib.HTTPConnection`.
+ :param strict:
+ Causes BadStatusLine to be raised if the status line can't be parsed
+ as a valid HTTP/1.0 or 1.1 status line, passed into
+ :class:`httplib.HTTPConnection`.
+ :param timeout:
+ Socket timeout for each individual connection, can be a float. None
+ disables timeout.
+ :param maxsize:
+ Number of connections to save that can be reused. More than 1 is useful
+ in multithreaded situations. If ``block`` is set to false, more
+ connections will be created but they will not be saved once they've
+ been used.
+ :param block:
+ If set to True, no more than ``maxsize`` connections will be used at
+ a time. When no free connections are available, the call will block
+ until a connection has been released. This is a useful side effect for
+ particular multithreaded situations where one does not want to use more
+ than maxsize connections per host to prevent flooding.
+ :param headers:
+ Headers to include with all requests, unless other headers are given
+ explicitly.
+ """
+ scheme = 'http'
+ def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
+ block=False, headers=None):
+ self.host = host
+ self.port = port
+ self.strict = strict
+ self.timeout = timeout
+ self.pool = Queue(maxsize)
+ self.block = block
+ self.headers = headers or {}
+ # Fill the queue up so that doing get() on it will block properly
+ for _ in xrange(maxsize):
+ self.pool.put(None)
+ # These are mostly for testing and debugging purposes.
+ self.num_connections = 0
+ self.num_requests = 0
+ def _new_conn(self):
+ """
+ Return a fresh :class:`httplib.HTTPConnection`.
+ """
+ self.num_connections += 1
+ log.info("Starting new HTTP connection (%d): %s" %
+ (self.num_connections, self.host))
+ return HTTPConnection(host=self.host, port=self.port)
+ def _get_conn(self, timeout=None):
+ """
+ Get a connection. Will return a pooled connection if one is available.
+ If no connections are available and :prop:`.block` is ``False``, then a
+ fresh connection is returned.
+ :param timeout:
+ Seconds to wait before giving up and raising
+ :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
+ :prop:`.block` is ``True``.
+ """
+ conn = None
+ try:
+ conn = self.pool.get(block=self.block, timeout=timeout)
+ # If this is a persistent connection, check if it got disconnected
+ if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]:
+ # Either data is buffered (bad), or the connection is dropped.
+ log.info("Resetting dropped connection: %s" % self.host)
+ conn.close()
+ except Empty:
+ if self.block:
+ raise EmptyPoolError("Pool reached maximum size and no more "
+ "connections are allowed.")
+ pass # Oh well, we'll create a new connection then
+ return conn or self._new_conn()
+ def _put_conn(self, conn):
+ """
+ Put a connection back into the pool.
+ :param conn:
+ Connection object for the current host and port as returned by
+ :meth:`._new_conn` or :meth:`._get_conn`.
+ If the pool is already full, the connection is discarded because we
+ exceeded maxsize. If connections are discarded frequently, then maxsize
+ should be increased.
+ """
+ try:
+ self.pool.put(conn, block=False)
+ except Full:
+ # This should never happen if self.block == True
+ log.warning("HttpConnectionPool is full, discarding connection: %s"
+ % self.host)
+ def _make_request(self, conn, method, url, timeout=_Default,
+ **httplib_request_kw):
+ """
+ Perform a request on a given httplib connection object taken from our
+ pool.
+ """
+ self.num_requests += 1
+ if timeout is _Default:
+ timeout = self.timeout
+ conn.request(method, url, **httplib_request_kw)
+ conn.sock.settimeout(timeout)
+ httplib_response = conn.getresponse()
+ log.debug("\"%s %s %s\" %s %s" %
+ (method, url,
+ conn._http_vsn_str, # pylint: disable-msg=W0212
+ httplib_response.status, httplib_response.length))
+ return httplib_response
+ def is_same_host(self, url):
+ """
+ Check if the given ``url`` is a member of the same host as this
+ conncetion pool.
+ """
+ # TODO: Add optional support for socket.gethostbyname checking.
+ return (url.startswith('/') or
+ get_host(url) == (self.scheme, self.host, self.port))
+ def urlopen(self, method, url, body=None, headers=None, retries=3,
+ redirect=True, assert_same_host=True, timeout=_Default,
+ pool_timeout=None, release_conn=None, **response_kw):
+ """
+ Get a connection from the pool and perform an HTTP request. This is the
+ lowest level call for making a request, so you'll need to specify all
+ the raw details.
+ .. note::
+ More commonly, it's appropriate to use a convenience method provided
+ by :class:`.RequestMethods`, such as :meth:`.request`.
+ :param method:
+ HTTP request method (such as GET, POST, PUT, etc.)
+ :param body:
+ Data to send in the request body (useful for creating
+ POST requests, see HTTPConnectionPool.post_url for
+ more convenience).
+ :param headers:
+ Dictionary of custom headers to send, such as User-Agent,
+ If-None-Match, etc. If None, pool headers are used. If provided,
+ these headers completely replace any pool-specific headers.
+ :param retries:
+ Number of retries to allow before raising a MaxRetryError exception.
+ :param redirect:
+ Automatically handle redirects (status codes 301, 302, 303, 307),
+ each redirect counts as a retry.
+ :param assert_same_host:
+ If ``True``, will make sure that the host of the pool requests is
+ consistent else will raise HostChangedError. When False, you can
+ use the pool on an HTTP proxy and request foreign hosts.
+ :param timeout:
+ If specified, overrides the default timeout for this one request.
+ :param pool_timeout:
+ If set and the pool is set to block=True, then this method will
+ block for ``pool_timeout`` seconds and raise EmptyPoolError if no
+ connection is available within the time period.
+ :param release_conn:
+ If False, then the urlopen call will not release the connection
+ back into the pool once a response is received. This is useful if
+ you're not preloading the response's content immediately. You will
+ need to call ``r.release_conn()`` on the response ``r`` to return
+ the connection back into the pool. If None, it takes the value of
+ ``response_kw.get('preload_content', True)``.
+ :param \**response_kw:
+ Additional parameters are passed to
+ :meth:`urllib3.response.HTTPResponse.from_httplib`
+ """
+ if headers is None:
+ headers = self.headers
+ if retries < 0:
+ raise MaxRetryError("Max retries exceeded for url: %s" % url)
+ if release_conn is None:
+ release_conn = response_kw.get('preload_content', True)
+ # Check host
+ if assert_same_host and not self.is_same_host(url):
+ host = "%s://%s" % (self.scheme, self.host)
+ if self.port:
+ host = "%s:%d" % (host, self.port)
+ raise HostChangedError("Connection pool with host '%s' tried to "
+ "open a foreign host: %s" % (host, url))
+ conn = None
+ try:
+ # Request a connection from the queue
+ # (Could raise SocketError: Bad file descriptor)
+ conn = self._get_conn(timeout=pool_timeout)
+ # Make the request on the httplib connection object
+ httplib_response = self._make_request(conn, method, url,
+ timeout=timeout,
+ body=body, headers=headers)
+ # If we're going to release the connection in ``finally:``, then
+ # the request doesn't need to know about the connection. Otherwise
+ # it will also try to release it and we'll have a double-release
+ # mess.
+ response_conn = not release_conn and conn
+ # Import httplib's response into our own wrapper object
+ response = HTTPResponse.from_httplib(httplib_response,
+ pool=self,
+ connection=response_conn,
+ **response_kw)
+ # else:
+ # The connection will be put back into the pool when
+ # ``response.release_conn()`` is called (implicitly by
+ # ``response.read()``)
+ except (SocketTimeout, Empty), e:
+ # Timed out either by socket or queue
+ raise TimeoutError("Request timed out after %s seconds" %
+ self.timeout)
+ except (BaseSSLError), e:
+ # SSL certificate error
+ raise SSLError(e)
+ except (HTTPException, SocketError), e:
+ # Connection broken, discard. It will be replaced next _get_conn().
+ conn = None
+ finally:
+ if conn and release_conn:
+ # Put the connection back to be reused
+ self._put_conn(conn)
+ if not conn:
+ log.warn("Retrying (%d attempts remain) after connection "
+ "broken by '%r': %s" % (retries, e, url))
+ return self.urlopen(method, url, body, headers, retries - 1,
+ redirect, assert_same_host) # Try again
+ # Handle redirection
+ if (redirect and
+ response.status in [301, 302, 303, 307] and
+ 'location' in response.headers): # Redirect, retry
+ log.info("Redirecting %s -> %s" %
+ (url, response.headers.get('location')))
+ return self.urlopen(method, response.headers.get('location'), body,
+ headers, retries - 1, redirect,
+ assert_same_host)
+ return response
+class HTTPSConnectionPool(HTTPConnectionPool):
+ """
+ Same as :class:`.HTTPConnectionPool`, but HTTPS.
+ When Python is compiled with the :mod:`ssl` module, then
+ :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
+ instead of :class:httplib.HTTPSConnection`.
+ The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters
+ are only used if :mod:`ssl` is available and are fed into
+ :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket.
+ """
+ scheme = 'https'
+ def __init__(self, host, port=None,
+ strict=False, timeout=None, maxsize=1,
+ block=False, headers=None,
+ key_file=None, cert_file=None,
+ cert_reqs='CERT_NONE', ca_certs=None):
+ super(HTTPSConnectionPool, self).__init__(host, port,
+ strict, timeout, maxsize,
+ block, headers)
+ self.key_file = key_file
+ self.cert_file = cert_file
+ self.cert_reqs = cert_reqs
+ self.ca_certs = ca_certs
+ def _new_conn(self):
+ """
+ Return a fresh :class:`httplib.HTTPSConnection`.
+ """
+ self.num_connections += 1
+ log.info("Starting new HTTPS connection (%d): %s"
+ % (self.num_connections, self.host))
+ if not ssl:
+ return HTTPSConnection(host=self.host, port=self.port)
+ connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
+ connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
+ cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
+ return connection
+## Helpers
+def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
+ basic_auth=None):
+ """
+ Shortcuts for generating request headers.
+ :param keep_alive:
+ If ``True``, adds 'connection: keep-alive' header.
+ :param accept_encoding:
+ Can be a boolean, list, or string.
+ ``True`` translates to 'gzip,deflate'.
+ List will get joined by comma.
+ String will be used as provided.
+ :param user_agent:
+ String representing the user-agent you want, such as
+ "python-urllib3/0.6"
+ :param basic_auth:
+ Colon-separated username:password string for 'authorization: basic ...'
+ auth header.
+ Example: ::
+ >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
+ {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
+ >>> make_headers(accept_encoding=True)
+ {'accept-encoding': 'gzip,deflate'}
+ """
+ headers = {}
+ if accept_encoding:
+ if isinstance(accept_encoding, str):
+ pass
+ elif isinstance(accept_encoding, list):
+ accept_encoding = ','.join(accept_encoding)
+ else:
+ accept_encoding = 'gzip,deflate'
+ headers['accept-encoding'] = accept_encoding
+ if user_agent:
+ headers['user-agent'] = user_agent
+ if keep_alive:
+ headers['connection'] = 'keep-alive'
+ if basic_auth:
+ headers['authorization'] = 'Basic ' + \
+ basic_auth.encode('base64').strip()
+ return headers
+def get_host(url):
+ """
+ Given a url, return its scheme, host and port (None if it's not there).
+ For example: ::
+ >>> get_host('http://google.com/mail/')
+ ('http', 'google.com', None)
+ >>> get_host('google.com:80')
+ ('http', 'google.com', 80)
+ """
+ # This code is actually similar to urlparse.urlsplit, but much
+ # simplified for our needs.
+ port = None
+ scheme = 'http'
+ if '//' in url:
+ scheme, url = url.split('://', 1)
+ if '/' in url:
+ url, _path = url.split('/', 1)
+ if ':' in url:
+ url, port = url.split(':', 1)
+ port = int(port)
+ return scheme, url, port
+def connection_from_url(url, **kw):
+ """
+ Given a url, return an :class:`.ConnectionPool` instance of its host.
+ This is a shortcut for not having to parse out the scheme, host, and port
+ of the url before creating an :class:`.ConnectionPool` instance.
+ :param url:
+ Absolute URL string that must include the scheme. Port is optional.
+ :param \**kw:
+ Passes additional parameters to the constructor of the appropriate
+ :class:`.ConnectionPool`. Useful for specifying things like
+ timeout, maxsize, headers, etc.
+ Example: ::
+ >>> conn = connection_from_url('http://google.com/')
+ >>> r = conn.request('GET', '/')
+ """
+ scheme, host, port = get_host(url)
+ if scheme == 'https':
+ return HTTPSConnectionPool(host, port=port, **kw)
+ else:
+ return HTTPConnectionPool(host, port=port, **kw)