aboutsummaryrefslogtreecommitdiff
path: root/urllib3/connectionpool.py
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3/connectionpool.py')
-rw-r--r--urllib3/connectionpool.py629
1 files changed, 629 insertions, 0 deletions
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
new file mode 100644
index 0000000..39e652e
--- /dev/null
+++ b/urllib3/connectionpool.py
@@ -0,0 +1,629 @@
+# urllib3/connectionpool.py
+# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+#
+# This module is part of urllib3 and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+import logging
+import socket
+
+from base64 import b64encode
+from socket import error as SocketError, timeout as SocketTimeout
+
+try:
+ from select import poll, POLLIN
+except ImportError: # Doesn't exist on OSX and other platforms
+ from select import select
+ poll = False
+
+try: # Python 3
+ from http.client import HTTPConnection, HTTPException
+ from http.client import HTTP_PORT, HTTPS_PORT
+except ImportError:
+ from httplib import HTTPConnection, HTTPException
+ from httplib import HTTP_PORT, HTTPS_PORT
+
+try: # Python 3
+ from queue import LifoQueue, Empty, Full
+except ImportError:
+ from Queue import LifoQueue, Empty, Full
+
+
+try: # Compiled with SSL?
+ HTTPSConnection = object
+ BaseSSLError = None
+ ssl = None
+
+ try: # Python 3
+ from http.client import HTTPSConnection
+ except ImportError:
+ from httplib import HTTPSConnection
+
+ import ssl
+ BaseSSLError = ssl.SSLError
+
+except ImportError:
+ pass
+
+
+from .packages.ssl_match_hostname import match_hostname, CertificateError
+from .request import RequestMethods
+from .response import HTTPResponse
+from .exceptions import (
+ EmptyPoolError,
+ HostChangedError,
+ LocationParseError,
+ MaxRetryError,
+ SSLError,
+ TimeoutError,
+)
+
+from .packages.ssl_match_hostname import match_hostname, CertificateError
+from .packages import six
+
+xrange = six.moves.xrange
+
+log = logging.getLogger(__name__)
+
+_Default = object()
+
+port_by_scheme = {
+ 'http': HTTP_PORT,
+ 'https': HTTPS_PORT,
+}
+
+## Connection objects (extension of httplib)
+
+class VerifiedHTTPSConnection(HTTPSConnection):
+ """
+ Based on httplib.HTTPSConnection but wraps the socket with
+ SSL certification.
+ """
+ cert_reqs = None
+ ca_certs = None
+
+ def set_cert(self, key_file=None, cert_file=None,
+ cert_reqs='CERT_NONE', ca_certs=None):
+ ssl_req_scheme = {
+ 'CERT_NONE': ssl.CERT_NONE,
+ 'CERT_OPTIONAL': ssl.CERT_OPTIONAL,
+ 'CERT_REQUIRED': ssl.CERT_REQUIRED
+ }
+
+ self.key_file = key_file
+ self.cert_file = cert_file
+ self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE
+ self.ca_certs = ca_certs
+
+ def connect(self):
+ # Add certificate verification
+ sock = socket.create_connection((self.host, self.port), self.timeout)
+
+ # Wrap socket using verification with the root certs in
+ # trusted_root_certs
+ self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
+ cert_reqs=self.cert_reqs,
+ ca_certs=self.ca_certs)
+ if self.ca_certs:
+ match_hostname(self.sock.getpeercert(), self.host)
+
+## Pool objects
+
+class ConnectionPool(object):
+ """
+ Base class for all connection pools, such as
+ :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
+ """
+
+ scheme = None
+ QueueCls = LifoQueue
+
+ def __init__(self, host, port=None):
+ self.host = host
+ self.port = port
+
+ def __str__(self):
+ return '%s(host=%r, port=%r)' % (type(self).__name__,
+ self.host, self.port)
+
+
+class HTTPConnectionPool(ConnectionPool, RequestMethods):
+ """
+ Thread-safe connection pool for one host.
+
+ :param host:
+ Host used for this HTTP Connection (e.g. "localhost"), passed into
+ :class:`httplib.HTTPConnection`.
+
+ :param port:
+ Port used for this HTTP Connection (None is equivalent to 80), passed
+ into :class:`httplib.HTTPConnection`.
+
+ :param strict:
+ Causes BadStatusLine to be raised if the status line can't be parsed
+ as a valid HTTP/1.0 or 1.1 status line, passed into
+ :class:`httplib.HTTPConnection`.
+
+ :param timeout:
+ Socket timeout for each individual connection, can be a float. None
+ disables timeout.
+
+ :param maxsize:
+ Number of connections to save that can be reused. More than 1 is useful
+ in multithreaded situations. If ``block`` is set to false, more
+ connections will be created but they will not be saved once they've
+ been used.
+
+ :param block:
+ If set to True, no more than ``maxsize`` connections will be used at
+ a time. When no free connections are available, the call will block
+ until a connection has been released. This is a useful side effect for
+ particular multithreaded situations where one does not want to use more
+ than maxsize connections per host to prevent flooding.
+
+ :param headers:
+ Headers to include with all requests, unless other headers are given
+ explicitly.
+ """
+
+ scheme = 'http'
+
+ def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
+ block=False, headers=None):
+ super(HTTPConnectionPool, self).__init__(host, port)
+
+ self.strict = strict
+ self.timeout = timeout
+ self.pool = self.QueueCls(maxsize)
+ self.block = block
+ self.headers = headers or {}
+
+ # Fill the queue up so that doing get() on it will block properly
+ for _ in xrange(maxsize):
+ self.pool.put(None)
+
+ # These are mostly for testing and debugging purposes.
+ self.num_connections = 0
+ self.num_requests = 0
+
+ def _new_conn(self):
+ """
+ Return a fresh :class:`httplib.HTTPConnection`.
+ """
+ self.num_connections += 1
+ log.info("Starting new HTTP connection (%d): %s" %
+ (self.num_connections, self.host))
+ return HTTPConnection(host=self.host, port=self.port)
+
+ def _get_conn(self, timeout=None):
+ """
+ Get a connection. Will return a pooled connection if one is available.
+
+ If no connections are available and :prop:`.block` is ``False``, then a
+ fresh connection is returned.
+
+ :param timeout:
+ Seconds to wait before giving up and raising
+ :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
+ :prop:`.block` is ``True``.
+ """
+ conn = None
+ try:
+ conn = self.pool.get(block=self.block, timeout=timeout)
+
+ # If this is a persistent connection, check if it got disconnected
+ if conn and conn.sock and is_connection_dropped(conn):
+ log.info("Resetting dropped connection: %s" % self.host)
+ conn.close()
+
+ except Empty:
+ if self.block:
+ raise EmptyPoolError(self,
+ "Pool reached maximum size and no more "
+ "connections are allowed.")
+ pass # Oh well, we'll create a new connection then
+
+ return conn or self._new_conn()
+
+ def _put_conn(self, conn):
+ """
+ Put a connection back into the pool.
+
+ :param conn:
+ Connection object for the current host and port as returned by
+ :meth:`._new_conn` or :meth:`._get_conn`.
+
+ If the pool is already full, the connection is discarded because we
+ exceeded maxsize. If connections are discarded frequently, then maxsize
+ should be increased.
+ """
+ try:
+ self.pool.put(conn, block=False)
+ except Full:
+ # This should never happen if self.block == True
+ log.warning("HttpConnectionPool is full, discarding connection: %s"
+ % self.host)
+
+ def _make_request(self, conn, method, url, timeout=_Default,
+ **httplib_request_kw):
+ """
+ Perform a request on a given httplib connection object taken from our
+ pool.
+ """
+ self.num_requests += 1
+
+ if timeout is _Default:
+ timeout = self.timeout
+
+ conn.timeout = timeout # This only does anything in Py26+
+
+ conn.request(method, url, **httplib_request_kw)
+ conn.sock.settimeout(timeout)
+ httplib_response = conn.getresponse()
+
+ log.debug("\"%s %s %s\" %s %s" %
+ (method, url,
+ conn._http_vsn_str, # pylint: disable-msg=W0212
+ httplib_response.status, httplib_response.length))
+
+ return httplib_response
+
+
+ def is_same_host(self, url):
+ """
+ Check if the given ``url`` is a member of the same host as this
+ connection pool.
+ """
+ # TODO: Add optional support for socket.gethostbyname checking.
+ scheme, host, port = get_host(url)
+
+ if self.port and not port:
+ # Use explicit default port for comparison when none is given.
+ port = port_by_scheme.get(scheme)
+
+ return (url.startswith('/') or
+ (scheme, host, port) == (self.scheme, self.host, self.port))
+
+ def urlopen(self, method, url, body=None, headers=None, retries=3,
+ redirect=True, assert_same_host=True, timeout=_Default,
+ pool_timeout=None, release_conn=None, **response_kw):
+ """
+ Get a connection from the pool and perform an HTTP request. This is the
+ lowest level call for making a request, so you'll need to specify all
+ the raw details.
+
+ .. note::
+
+ More commonly, it's appropriate to use a convenience method provided
+ by :class:`.RequestMethods`, such as :meth:`.request`.
+
+ .. note::
+
+ `release_conn` will only behave as expected if
+ `preload_content=False` because we want to make
+ `preload_content=False` the default behaviour someday soon without
+ breaking backwards compatibility.
+
+ :param method:
+ HTTP request method (such as GET, POST, PUT, etc.)
+
+ :param body:
+ Data to send in the request body (useful for creating
+ POST requests, see HTTPConnectionPool.post_url for
+ more convenience).
+
+ :param headers:
+ Dictionary of custom headers to send, such as User-Agent,
+ If-None-Match, etc. If None, pool headers are used. If provided,
+ these headers completely replace any pool-specific headers.
+
+ :param retries:
+ Number of retries to allow before raising a MaxRetryError exception.
+
+ :param redirect:
+ Automatically handle redirects (status codes 301, 302, 303, 307),
+ each redirect counts as a retry.
+
+ :param assert_same_host:
+ If ``True``, will make sure that the host of the pool requests is
+ consistent else will raise HostChangedError. When False, you can
+ use the pool on an HTTP proxy and request foreign hosts.
+
+ :param timeout:
+ If specified, overrides the default timeout for this one request.
+
+ :param pool_timeout:
+ If set and the pool is set to block=True, then this method will
+ block for ``pool_timeout`` seconds and raise EmptyPoolError if no
+ connection is available within the time period.
+
+ :param release_conn:
+ If False, then the urlopen call will not release the connection
+ back into the pool once a response is received (but will release if
+ you read the entire contents of the response such as when
+ `preload_content=True`). This is useful if you're not preloading
+ the response's content immediately. You will need to call
+ ``r.release_conn()`` on the response ``r`` to return the connection
+ back into the pool. If None, it takes the value of
+ ``response_kw.get('preload_content', True)``.
+
+ :param \**response_kw:
+ Additional parameters are passed to
+ :meth:`urllib3.response.HTTPResponse.from_httplib`
+ """
+ if headers is None:
+ headers = self.headers
+
+ if retries < 0:
+ raise MaxRetryError(self, url)
+
+ if timeout is _Default:
+ timeout = self.timeout
+
+ if release_conn is None:
+ release_conn = response_kw.get('preload_content', True)
+
+ # Check host
+ if assert_same_host and not self.is_same_host(url):
+ host = "%s://%s" % (self.scheme, self.host)
+ if self.port:
+ host = "%s:%d" % (host, self.port)
+
+ raise HostChangedError(self, url, retries - 1)
+
+ conn = None
+
+ try:
+ # Request a connection from the queue
+ # (Could raise SocketError: Bad file descriptor)
+ conn = self._get_conn(timeout=pool_timeout)
+
+ # Make the request on the httplib connection object
+ httplib_response = self._make_request(conn, method, url,
+ timeout=timeout,
+ body=body, headers=headers)
+
+ # If we're going to release the connection in ``finally:``, then
+ # the request doesn't need to know about the connection. Otherwise
+ # it will also try to release it and we'll have a double-release
+ # mess.
+ response_conn = not release_conn and conn
+
+ # Import httplib's response into our own wrapper object
+ response = HTTPResponse.from_httplib(httplib_response,
+ pool=self,
+ connection=response_conn,
+ **response_kw)
+
+ # else:
+ # The connection will be put back into the pool when
+ # ``response.release_conn()`` is called (implicitly by
+ # ``response.read()``)
+
+ except Empty as e:
+ # Timed out by queue
+ raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" %
+ pool_timeout)
+
+ except SocketTimeout as e:
+ # Timed out by socket
+ raise TimeoutError(self, "Request timed out. (timeout=%s)" %
+ timeout)
+
+ except BaseSSLError as e:
+ # SSL certificate error
+ raise SSLError(e)
+
+ except CertificateError as e:
+ # Name mismatch
+ raise SSLError(e)
+
+ except (HTTPException, SocketError) as e:
+ # Connection broken, discard. It will be replaced next _get_conn().
+ conn = None
+ # This is necessary so we can access e below
+ err = e
+
+ finally:
+ if conn and release_conn:
+ # Put the connection back to be reused
+ self._put_conn(conn)
+
+ if not conn:
+ log.warn("Retrying (%d attempts remain) after connection "
+ "broken by '%r': %s" % (retries, err, url))
+ return self.urlopen(method, url, body, headers, retries - 1,
+ redirect, assert_same_host) # Try again
+
+ # Handle redirect?
+ redirect_location = redirect and response.get_redirect_location()
+ if redirect_location:
+ log.info("Redirecting %s -> %s" % (url, redirect_location))
+ return self.urlopen(method, redirect_location, body, headers,
+ retries - 1, redirect, assert_same_host)
+
+ return response
+
+
+class HTTPSConnectionPool(HTTPConnectionPool):
+ """
+ Same as :class:`.HTTPConnectionPool`, but HTTPS.
+
+ When Python is compiled with the :mod:`ssl` module, then
+ :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
+ instead of :class:httplib.HTTPSConnection`.
+
+ The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters
+ are only used if :mod:`ssl` is available and are fed into
+ :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket.
+ """
+
+ scheme = 'https'
+
+ def __init__(self, host, port=None,
+ strict=False, timeout=None, maxsize=1,
+ block=False, headers=None,
+ key_file=None, cert_file=None,
+ cert_reqs='CERT_NONE', ca_certs=None):
+
+ super(HTTPSConnectionPool, self).__init__(host, port,
+ strict, timeout, maxsize,
+ block, headers)
+ self.key_file = key_file
+ self.cert_file = cert_file
+ self.cert_reqs = cert_reqs
+ self.ca_certs = ca_certs
+
+ def _new_conn(self):
+ """
+ Return a fresh :class:`httplib.HTTPSConnection`.
+ """
+ self.num_connections += 1
+ log.info("Starting new HTTPS connection (%d): %s"
+ % (self.num_connections, self.host))
+
+ if not ssl: # Platform-specific: Python compiled without +ssl
+ if not HTTPSConnection or HTTPSConnection is object:
+ raise SSLError("Can't connect to HTTPS URL because the SSL "
+ "module is not available.")
+
+ return HTTPSConnection(host=self.host, port=self.port)
+
+ connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
+ connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
+ cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
+ return connection
+
+
+## Helpers
+
+def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
+ basic_auth=None):
+ """
+ Shortcuts for generating request headers.
+
+ :param keep_alive:
+ If ``True``, adds 'connection: keep-alive' header.
+
+ :param accept_encoding:
+ Can be a boolean, list, or string.
+ ``True`` translates to 'gzip,deflate'.
+ List will get joined by comma.
+ String will be used as provided.
+
+ :param user_agent:
+ String representing the user-agent you want, such as
+ "python-urllib3/0.6"
+
+ :param basic_auth:
+ Colon-separated username:password string for 'authorization: basic ...'
+ auth header.
+
+ Example: ::
+
+ >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
+ {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
+ >>> make_headers(accept_encoding=True)
+ {'accept-encoding': 'gzip,deflate'}
+ """
+ headers = {}
+ if accept_encoding:
+ if isinstance(accept_encoding, str):
+ pass
+ elif isinstance(accept_encoding, list):
+ accept_encoding = ','.join(accept_encoding)
+ else:
+ accept_encoding = 'gzip,deflate'
+ headers['accept-encoding'] = accept_encoding
+
+ if user_agent:
+ headers['user-agent'] = user_agent
+
+ if keep_alive:
+ headers['connection'] = 'keep-alive'
+
+ if basic_auth:
+ headers['authorization'] = 'Basic ' + \
+ b64encode(six.b(basic_auth)).decode('utf-8')
+
+ return headers
+
+
+def get_host(url):
+ """
+ Given a url, return its scheme, host and port (None if it's not there).
+
+ For example: ::
+
+ >>> get_host('http://google.com/mail/')
+ ('http', 'google.com', None)
+ >>> get_host('google.com:80')
+ ('http', 'google.com', 80)
+ """
+
+ # This code is actually similar to urlparse.urlsplit, but much
+ # simplified for our needs.
+ port = None
+ scheme = 'http'
+
+ if '://' in url:
+ scheme, url = url.split('://', 1)
+ if '/' in url:
+ url, _path = url.split('/', 1)
+ if '@' in url:
+ _auth, url = url.split('@', 1)
+ if ':' in url:
+ url, port = url.split(':', 1)
+
+ if not port.isdigit():
+ raise LocationParseError("Failed to parse: %s")
+
+ port = int(port)
+
+ return scheme, url, port
+
+
+def connection_from_url(url, **kw):
+ """
+ Given a url, return an :class:`.ConnectionPool` instance of its host.
+
+ This is a shortcut for not having to parse out the scheme, host, and port
+ of the url before creating an :class:`.ConnectionPool` instance.
+
+ :param url:
+ Absolute URL string that must include the scheme. Port is optional.
+
+ :param \**kw:
+ Passes additional parameters to the constructor of the appropriate
+ :class:`.ConnectionPool`. Useful for specifying things like
+ timeout, maxsize, headers, etc.
+
+ Example: ::
+
+ >>> conn = connection_from_url('http://google.com/')
+ >>> r = conn.request('GET', '/')
+ """
+ scheme, host, port = get_host(url)
+ if scheme == 'https':
+ return HTTPSConnectionPool(host, port=port, **kw)
+ else:
+ return HTTPConnectionPool(host, port=port, **kw)
+
+
+def is_connection_dropped(conn):
+ """
+ Returns True if the connection is dropped and should be closed.
+
+ :param conn:
+ ``HTTPConnection`` object.
+ """
+ if not poll: # Platform-specific
+ return select([conn.sock], [], [], 0.0)[0]
+
+ # This version is better on platforms that support it.
+ p = poll()
+ p.register(conn.sock, POLLIN)
+ for (fno, ev) in p.poll(0.0):
+ if fno == conn.sock.fileno():
+ # Either data is buffered (bad), or the connection is dropped.
+ return True