diff options
Diffstat (limited to 'requests')
33 files changed, 1176 insertions, 704 deletions
diff --git a/requests/__init__.py b/requests/__init__.py index bba1900..d5e1956 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -13,7 +13,7 @@ Requests is an HTTP library, written in Python, for human beings. Basic GET usage: >>> import requests - >>> r = requests.get('http://python.org') + >>> r = requests.get('https://www.python.org') >>> r.status_code 200 >>> 'Python is a programming language' in r.content @@ -22,7 +22,7 @@ usage: ... or POST: >>> payload = dict(key1='value1', key2='value2') - >>> r = requests.post("http://httpbin.org/post", data=payload) + >>> r = requests.post('http://httpbin.org/post', data=payload) >>> print(r.text) { ... @@ -42,8 +42,8 @@ is at <http://python-requests.org>. """ __title__ = 'requests' -__version__ = '2.3.0' -__build__ = 0x020300 +__version__ = '2.4.3' +__build__ = 0x020403 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2014 Kenneth Reitz' diff --git a/requests/adapters.py b/requests/adapters.py index eb7a2d2..abb25d1 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -11,20 +11,24 @@ and maintain connections. import socket from .models import Response +from .packages.urllib3 import Retry from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .packages.urllib3.util import Timeout as TimeoutSauce -from .compat import urlparse, basestring, urldefrag, unquote +from .compat import urlparse, basestring from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - prepend_scheme_if_needed, get_auth_from_url) + prepend_scheme_if_needed, get_auth_from_url, urldefragauth) from .structures import CaseInsensitiveDict -from .packages.urllib3.exceptions import MaxRetryError -from .packages.urllib3.exceptions import TimeoutError -from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import ConnectTimeoutError from .packages.urllib3.exceptions import HTTPError as _HTTPError +from .packages.urllib3.exceptions import MaxRetryError from .packages.urllib3.exceptions import ProxyError as _ProxyError +from .packages.urllib3.exceptions import ProtocolError +from .packages.urllib3.exceptions import ReadTimeoutError +from .packages.urllib3.exceptions import SSLError as _SSLError from .cookies import extract_cookies_to_jar -from .exceptions import ConnectionError, Timeout, SSLError, ProxyError +from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, + ProxyError) from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False @@ -101,14 +105,17 @@ class HTTPAdapter(BaseAdapter): self.init_poolmanager(self._pool_connections, self._pool_maxsize, block=self._pool_block) - def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK): - """Initializes a urllib3 PoolManager. This method should not be called - from user code, and is only exposed for use when subclassing the + def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs): + """Initializes a urllib3 PoolManager. + + This method should not be called from user code, and is only + exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param connections: The number of urllib3 connection pools to cache. :param maxsize: The maximum number of connections to save in the pool. :param block: Block when no free connections are available. + :param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager. """ # save these values for pickling self._pool_connections = connections @@ -116,7 +123,30 @@ class HTTPAdapter(BaseAdapter): self._pool_block = block self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, - block=block) + block=block, **pool_kwargs) + + def proxy_manager_for(self, proxy, **proxy_kwargs): + """Return urllib3 ProxyManager for the given proxy. + + This method should not be called from user code, and is only + exposed for use when subclassing the + :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. + + :param proxy: The proxy to return a urllib3 ProxyManager for. + :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager. + :returns: ProxyManager + """ + if not proxy in self.proxy_manager: + proxy_headers = self.proxy_headers(proxy) + self.proxy_manager[proxy] = proxy_from_url( + proxy, + proxy_headers=proxy_headers, + num_pools=self._pool_connections, + maxsize=self._pool_maxsize, + block=self._pool_block, + **proxy_kwargs) + + return self.proxy_manager[proxy] def cert_verify(self, conn, url, verify, cert): """Verify a SSL certificate. This method should not be called from user @@ -204,17 +234,8 @@ class HTTPAdapter(BaseAdapter): if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') - proxy_headers = self.proxy_headers(proxy) - - if not proxy in self.proxy_manager: - self.proxy_manager[proxy] = proxy_from_url( - proxy, - proxy_headers=proxy_headers, - num_pools=self._pool_connections, - maxsize=self._pool_maxsize, - block=self._pool_block) - - conn = self.proxy_manager[proxy].connection_from_url(url) + proxy_manager = self.proxy_manager_for(proxy) + conn = proxy_manager.connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) @@ -249,7 +270,7 @@ class HTTPAdapter(BaseAdapter): proxy = proxies.get(scheme) if proxy and scheme != 'https': - url, _ = urldefrag(request.url) + url = urldefragauth(request.url) else: url = request.path_url @@ -296,7 +317,10 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param stream: (optional) Whether to stream the request content. - :param timeout: (optional) The timeout on the request. + :param timeout: (optional) How long to wait for the server to send + data before giving up, as a float, or a (`connect timeout, read + timeout <user/advanced.html#timeouts>`_) tuple. + :type timeout: float or tuple :param verify: (optional) Whether to verify SSL certificates. :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. @@ -310,7 +334,18 @@ class HTTPAdapter(BaseAdapter): chunked = not (request.body is None or 'Content-Length' in request.headers) - timeout = TimeoutSauce(connect=timeout, read=timeout) + if isinstance(timeout, tuple): + try: + connect, read = timeout + timeout = TimeoutSauce(connect=connect, read=read) + except ValueError as e: + # this may raise a string formatting error. + err = ("Invalid timeout {0}. Pass a (connect, read) " + "timeout tuple, or a single float to set " + "both timeouts to the same value".format(timeout)) + raise ValueError(err) + else: + timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: @@ -323,7 +358,7 @@ class HTTPAdapter(BaseAdapter): assert_same_host=False, preload_content=False, decode_content=False, - retries=self.max_retries, + retries=Retry(self.max_retries, read=False), timeout=timeout ) @@ -368,10 +403,13 @@ class HTTPAdapter(BaseAdapter): # All is well, return the connection to the pool. conn._put_conn(low_conn) - except socket.error as sockerr: - raise ConnectionError(sockerr, request=request) + except (ProtocolError, socket.error) as err: + raise ConnectionError(err, request=request) except MaxRetryError as e: + if isinstance(e.reason, ConnectTimeoutError): + raise ConnectTimeout(e, request=request) + raise ConnectionError(e, request=request) except _ProxyError as e: @@ -380,8 +418,8 @@ class HTTPAdapter(BaseAdapter): except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): raise SSLError(e, request=request) - elif isinstance(e, TimeoutError): - raise Timeout(e, request=request) + elif isinstance(e, ReadTimeoutError): + raise ReadTimeout(e, request=request) else: raise diff --git a/requests/api.py b/requests/api.py index 01d853d..4eaaf9e 100644 --- a/requests/api.py +++ b/requests/api.py @@ -22,12 +22,17 @@ def request(method, url, **kwargs): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. + :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request in seconds. + :param timeout: (optional) How long to wait for the server to send data + before giving up, as a float, or a (`connect timeout, read timeout + <user/advanced.html#timeouts>`_) tuple. + :type timeout: float or tuple :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. + :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param stream: (optional) if ``False``, the response content will be immediately downloaded. @@ -77,15 +82,16 @@ def head(url, **kwargs): return request('head', url, **kwargs) -def post(url, data=None, **kwargs): +def post(url, data=None, json=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. """ - return request('post', url, data=data, **kwargs) + return request('post', url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): diff --git a/requests/auth.py b/requests/auth.py index 9f831b7..9b6426d 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -16,7 +16,7 @@ from base64 import b64encode from .compat import urlparse, str from .cookies import extract_cookies_to_jar -from .utils import parse_dict_header +from .utils import parse_dict_header, to_native_string CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' @@ -25,7 +25,11 @@ CONTENT_TYPE_MULTI_PART = 'multipart/form-data' def _basic_auth_str(username, password): """Returns a Basic Auth string.""" - return 'Basic ' + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip().decode('latin1') + authstr = 'Basic ' + to_native_string( + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip() + ) + + return authstr class AuthBase(object): diff --git a/requests/certs.py b/requests/certs.py index bc00826..07e6475 100644 --- a/requests/certs.py +++ b/requests/certs.py @@ -11,14 +11,15 @@ If you are packaging Requests, e.g., for a Linux distribution or a managed environment, you can change the definition of where() to return a separately packaged CA bundle. """ - import os.path - -def where(): - """Return the preferred certificate bundle.""" - # vendored bundle inside Requests - return os.path.join(os.path.dirname(__file__), 'cacert.pem') +try: + from certifi import where +except ImportError: + def where(): + """Return the preferred certificate bundle.""" + # vendored bundle inside Requests + return os.path.join(os.path.dirname(__file__), 'cacert.pem') if __name__ == '__main__': print(where()) diff --git a/requests/compat.py b/requests/compat.py index bdf10d6..be5a1ed 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -75,7 +75,9 @@ is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. try: import simplejson as json -except ImportError: +except (ImportError, SyntaxError): + # simplejson does not support Python 3.2, it thows a SyntaxError + # because of u'...' Unicode literals. import json # --------- @@ -90,7 +92,6 @@ if is_py2: from Cookie import Morsel from StringIO import StringIO from .packages.urllib3.packages.ordered_dict import OrderedDict - from httplib import IncompleteRead builtin_str = str bytes = str @@ -106,7 +107,6 @@ elif is_py3: from http.cookies import Morsel from io import StringIO from collections import OrderedDict - from http.client import IncompleteRead builtin_str = str str = str diff --git a/requests/exceptions.py b/requests/exceptions.py index a4ee9d6..34c7a0d 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -44,7 +44,23 @@ class SSLError(ConnectionError): class Timeout(RequestException): - """The request timed out.""" + """The request timed out. + + Catching this error will catch both + :exc:`~requests.exceptions.ConnectTimeout` and + :exc:`~requests.exceptions.ReadTimeout` errors. + """ + + +class ConnectTimeout(ConnectionError, Timeout): + """The request timed out while trying to connect to the remote server. + + Requests that produced this error are safe to retry. + """ + + +class ReadTimeout(Timeout): + """The server did not send any data in the allotted amount of time.""" class URLRequired(RequestException): @@ -73,3 +89,6 @@ class ChunkedEncodingError(RequestException): class ContentDecodingError(RequestException, BaseHTTPError): """Failed to decode response content""" + +class StreamConsumedError(RequestException, TypeError): + """The content for this response was already consumed""" diff --git a/requests/models.py b/requests/models.py index 120968f..17e5598 100644 --- a/requests/models.py +++ b/requests/models.py @@ -19,31 +19,36 @@ from .cookies import cookiejar_from_dict, get_cookie_header from .packages.urllib3.fields import RequestField from .packages.urllib3.filepost import encode_multipart_formdata from .packages.urllib3.util import parse_url -from .packages.urllib3.exceptions import DecodeError +from .packages.urllib3.exceptions import ( + DecodeError, ReadTimeoutError, ProtocolError) from .exceptions import ( - HTTPError, RequestException, MissingSchema, InvalidURL, - ChunkedEncodingError, ContentDecodingError) + HTTPError, RequestException, MissingSchema, InvalidURL, + ChunkedEncodingError, ContentDecodingError, ConnectionError, + StreamConsumedError) from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, iter_slices, guess_json_utf, super_len, to_native_string) from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, - is_py2, chardet, json, builtin_str, basestring, IncompleteRead) + is_py2, chardet, json, builtin_str, basestring) from .status_codes import codes #: The set of HTTP status codes that indicate an automatically #: processable redirect. REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 - codes.temporary_moved, # 307 + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 + codes.temporary_redirect, # 307 + codes.permanent_redirect, # 308 ) DEFAULT_REDIRECT_LIMIT = 30 CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 +json_dumps = json.dumps + class RequestEncodingMixin(object): @property @@ -187,7 +192,8 @@ class Request(RequestHooksMixin): :param url: URL to send. :param headers: dictionary of headers to send. :param files: dictionary of {filename: fileobject} files to multipart upload. - :param data: the body to attach the request. If a dictionary is provided, form-encoding will take place. + :param data: the body to attach to the request. If a dictionary is provided, form-encoding will take place. + :param json: json for the body to attach to the request (if data is not specified). :param params: dictionary of URL parameters to append to the URL. :param auth: Auth handler or (user, pass) tuple. :param cookies: dictionary or CookieJar of cookies to attach to this request. @@ -210,7 +216,8 @@ class Request(RequestHooksMixin): params=None, auth=None, cookies=None, - hooks=None): + hooks=None, + json=None): # Default empty dicts for dict params. data = [] if data is None else data @@ -228,6 +235,7 @@ class Request(RequestHooksMixin): self.headers = headers self.files = files self.data = data + self.json = json self.params = params self.auth = auth self.cookies = cookies @@ -244,6 +252,7 @@ class Request(RequestHooksMixin): headers=self.headers, files=self.files, data=self.data, + json=self.json, params=self.params, auth=self.auth, cookies=self.cookies, @@ -287,14 +296,15 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.hooks = default_hooks() def prepare(self, method=None, url=None, headers=None, files=None, - data=None, params=None, auth=None, cookies=None, hooks=None): + data=None, params=None, auth=None, cookies=None, hooks=None, + json=None): """Prepares the entire request with the given parameters.""" self.prepare_method(method) self.prepare_url(url, params) self.prepare_headers(headers) self.prepare_cookies(cookies) - self.prepare_body(data, files) + self.prepare_body(data, files, json) self.prepare_auth(auth, url) # Note that prepare_auth must be last to enable authentication schemes # such as OAuth to work on a fully prepared request. @@ -309,8 +319,8 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): p = PreparedRequest() p.method = self.method p.url = self.url - p.headers = self.headers.copy() - p._cookies = self._cookies.copy() + p.headers = self.headers.copy() if self.headers is not None else None + p._cookies = self._cookies.copy() if self._cookies is not None else None p.body = self.body p.hooks = self.hooks return p @@ -324,15 +334,18 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. - try: - url = unicode(url) - except NameError: - # We're on Python 3. - url = str(url) - except UnicodeDecodeError: - pass - - # Don't do any URL preparation for oddball schemes + #: We're unable to blindy call unicode/str functions + #: as this will include the bytestring indicator (b'') + #: on python 3.x. + #: https://github.com/kennethreitz/requests/pull/2238 + if isinstance(url, bytes): + url = url.decode('utf8') + else: + url = unicode(url) if is_py2 else str(url) + + # Don't do any URL preparation for non-HTTP schemes like `mailto`, + # `data` etc to work around exceptions from `url_parse`, which + # handles RFC 3986 only. if ':' in url and not url.lower().startswith('http'): self.url = url return @@ -395,7 +408,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): else: self.headers = CaseInsensitiveDict() - def prepare_body(self, data, files): + def prepare_body(self, data, files, json=None): """Prepares the given HTTP body data.""" # Check if file, fo, generator, iterator. @@ -406,6 +419,10 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): content_type = None length = None + if json is not None: + content_type = 'application/json' + body = json_dumps(json) + is_stream = all([ hasattr(data, '__iter__'), not isinstance(data, (basestring, list, tuple, dict)) @@ -431,9 +448,9 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if files: (body, content_type) = self._encode_files(files, data) else: - if data: + if data and json is None: body = self._encode_params(data) - if isinstance(data, str) or isinstance(data, builtin_str) or hasattr(data, 'read'): + if isinstance(data, basestring) or hasattr(data, 'read'): content_type = None else: content_type = 'application/x-www-form-urlencoded' @@ -441,7 +458,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.prepare_content_length(body) # Add content-type if it wasn't explicitly provided. - if (content_type) and (not 'content-type' in self.headers): + if content_type and ('content-type' not in self.headers): self.headers['Content-Type'] = content_type self.body = body @@ -556,6 +573,10 @@ class Response(object): #: and the arrival of the response (as a timedelta) self.elapsed = datetime.timedelta(0) + #: The :class:`PreparedRequest <PreparedRequest>` object to which this + #: is a response. + self.request = None + def __getstate__(self): # Consume everything; accessing the content attribute makes # sure the content has been fully read. @@ -606,6 +627,11 @@ class Response(object): return ('location' in self.headers and self.status_code in REDIRECT_STATI) @property + def is_permanent_redirect(self): + """True if this Response one of the permanant versions of redirect""" + return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect)) + + @property def apparent_encoding(self): """The apparent encoding, provided by the chardet library""" return chardet.detect(self.content)['encoding'] @@ -626,10 +652,12 @@ class Response(object): try: for chunk in self.raw.stream(chunk_size, decode_content=True): yield chunk - except IncompleteRead as e: + except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) + except ReadTimeoutError as e: + raise ConnectionError(e) except AttributeError: # Standard file-like object. while True: @@ -640,6 +668,8 @@ class Response(object): self._content_consumed = True + if self._content_consumed and isinstance(self._content, bool): + raise StreamConsumedError() # simulate reading small chunks of the content reused_chunks = iter_slices(self._content, chunk_size) diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index 73071f7..4b36b5a 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -1,9 +1,3 @@ -# urllib3/__init__.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ urllib3 - Thread-safe connection pooling and re-using. """ @@ -23,7 +17,10 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host, Timeout +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry # Set default logging handler to avoid "No handler found" warnings. @@ -51,8 +48,19 @@ def add_stderr_logger(level=logging.DEBUG): handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(handler) logger.setLevel(level) - logger.debug('Added an stderr logging handler to logger: %s' % __name__) + logger.debug('Added a stderr logging handler to logger: %s' % __name__) return handler # ... Clean up. del NullHandler + + +# Set security warning to only go off once by default. +import warnings +warnings.simplefilter('module', exceptions.SecurityWarning) + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index 9cea3a4..d77ebb8 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -1,9 +1,3 @@ -# urllib3/_collections.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - from collections import Mapping, MutableMapping try: from threading import RLock @@ -116,7 +110,7 @@ class HTTPHeaderDict(MutableMapping): A ``dict`` like container for storing HTTP Headers. Field names are stored and compared case-insensitively in compliance with - RFC 2616. Iteration provides the first case-sensitive key seen for each + RFC 7230. Iteration provides the first case-sensitive key seen for each case-insensitive pair. Using ``__setitem__`` syntax overwrites fields that compare equal diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py index 5feb332..c6e1959 100644 --- a/requests/packages/urllib3/connection.py +++ b/requests/packages/urllib3/connection.py @@ -1,95 +1,133 @@ -# urllib3/connection.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - +import datetime import sys import socket from socket import timeout as SocketTimeout +import warnings -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException except ImportError: from httplib import HTTPConnection as _HTTPConnection, HTTPException + class DummyConnection(object): "Used to detect a failed ConnectionCls import." pass -try: # Compiled with SSL? - ssl = None + +try: # Compiled with SSL? HTTPSConnection = DummyConnection + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None class BaseSSLError(BaseException): pass - try: # Python 3 - from http.client import HTTPSConnection as _HTTPSConnection - except ImportError: - from httplib import HTTPSConnection as _HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass from .exceptions import ( ConnectTimeoutError, + SystemTimeWarning, ) from .packages.ssl_match_hostname import match_hostname from .packages import six -from .util import ( - assert_fingerprint, + +from .util.ssl_ import ( resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, + assert_fingerprint, ) +from .util import connection + port_by_scheme = { 'http': 80, 'https': 443, } +RECENT_DATE = datetime.date(2014, 1, 1) + class HTTPConnection(_HTTPConnection, object): """ Based on httplib.HTTPConnection but provides an extra constructor backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). """ default_port = port_by_scheme['http'] - # By default, disable Nagle's Algorithm. - tcp_nodelay = 1 + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + #: Whether this connection verifies the host's certificate. + is_verified = False def __init__(self, *args, **kw): if six.PY3: # Python 3 kw.pop('strict', None) - if sys.version_info < (2, 7): # Python 2.6 and older - kw.pop('source_address', None) # Pre-set source_address in case we have an older Python like 2.6. self.source_address = kw.get('source_address') + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + # Superclass also sets self.source_address in Python 2.7+. - _HTTPConnection.__init__(self, *args, **kw) + _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): """ Establish a socket connection and set nodelay settings on it. - :return: a new socket connection + :return: New socket connection. """ - extra_args = [] - if self.source_address: # Python 2.7+ - extra_args.append(self.source_address) + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address - conn = socket.create_connection( - (self.host, self.port), self.timeout, *extra_args) - conn.setsockopt( - socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + conn = connection.create_connection( + (self.host, self.port), self.timeout, **extra_kw) + + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) return conn @@ -101,6 +139,8 @@ class HTTPConnection(_HTTPConnection, object): if getattr(self, '_tunnel_host', None): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 def connect(self): conn = self._new_conn() @@ -137,7 +177,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None - conn_kw = {} + assert_fingerprint = None def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -152,18 +192,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - - try: - sock = socket.create_connection( - address=(self.host, self.port), timeout=self.timeout, - **self.conn_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + conn = self._new_conn() resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) @@ -173,29 +202,42 @@ class VerifiedHTTPSConnection(HTTPSConnection): # _tunnel_host was added in Python 2.6.3 # (See: http://hg.python.org/cpython/rev/0f57b30a152f) - self.sock = sock + self.sock = conn # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 # Override the host with the one we're requesting data from. hostname = self._tunnel_host + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn(( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors').format(RECENT_DATE), + SystemTimeWarning + ) + # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, server_hostname=hostname, ssl_version=resolved_ssl_version) - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or hostname) + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif resolved_cert_reqs != ssl.CERT_NONE \ + and self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or hostname) + + self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED + or self.assert_fingerprint is not None) if ssl: diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 95a53a7..9cc2a95 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -1,17 +1,12 @@ -# urllib3/connectionpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - -import sys import errno import logging +import sys +import warnings from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full @@ -20,16 +15,16 @@ except ImportError: from .exceptions import ( ClosedPoolError, - ConnectionError, - ConnectTimeoutError, + ProtocolError, EmptyPoolError, HostChangedError, - LocationParseError, + LocationValueError, MaxRetryError, + ProxyError, + ReadTimeoutError, SSLError, TimeoutError, - ReadTimeoutError, - ProxyError, + InsecureRequestWarning, ) from .packages.ssl_match_hostname import CertificateError from .packages import six @@ -41,11 +36,11 @@ from .connection import ( ) from .request import RequestMethods from .response import HTTPResponse -from .util import ( - get_host, - is_connection_dropped, - Timeout, -) + +from .util.connection import is_connection_dropped +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host xrange = six.moves.xrange @@ -54,8 +49,8 @@ log = logging.getLogger(__name__) _Default = object() -## Pool objects +## Pool objects class ConnectionPool(object): """ Base class for all connection pools, such as @@ -66,13 +61,11 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): - if host is None: - raise LocationParseError(host) + if not host: + raise LocationValueError("No host specified.") # httplib doesn't like it when we include brackets in ipv6 addresses - host = host.strip('[]') - - self.host = host + self.host = host.strip('[]') self.port = port def __str__(self): @@ -82,6 +75,7 @@ class ConnectionPool(object): # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Thread-safe connection pool for one host. @@ -126,6 +120,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Headers to include with all requests, unless other headers are given explicitly. + :param retries: + Retry configuration to use by default with requests in this pool. + :param _proxy: Parsed proxy URL, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" @@ -133,6 +130,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy_headers: A dictionary with proxy headers, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. """ scheme = 'http' @@ -140,18 +141,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None, **conn_kw): + headers=None, retries=None, + _proxy=None, _proxy_headers=None, + **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict - # This is for backwards compatibility and can be removed once a timeout - # can only be set to a Timeout object if not isinstance(timeout, Timeout): timeout = Timeout.from_float(timeout) + if retries is None: + retries = Retry.DEFAULT + self.timeout = timeout + self.retries = retries self.pool = self.QueueCls(maxsize) self.block = block @@ -166,11 +171,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 - - if sys.version_info < (2, 7): # Python 2.6 and older - conn_kw.pop('source_address', None) self.conn_kw = conn_kw + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -182,10 +190,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, strict=self.strict, **self.conn_kw) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 return conn def _get_conn(self, timeout=None): @@ -204,7 +208,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - except AttributeError: # self.pool is None + except AttributeError: # self.pool is None raise ClosedPoolError(self, "Pool is closed.") except Empty: @@ -218,6 +222,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None return conn or self._new_conn() @@ -237,7 +246,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ try: self.pool.put(conn, block=False) - return # Everything is dandy, done. + return # Everything is dandy, done. except AttributeError: # self.pool is None. pass @@ -251,6 +260,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn: conn.close() + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -282,23 +297,21 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_requests += 1 timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout - try: - timeout_obj.start_connect() - conn.timeout = timeout_obj.connect_timeout - # conn.request() calls httplib.*.request, not the method in - # urllib3.request. It also calls makefile (recv) on the socket. - conn.request(method, url, **httplib_request_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, timeout_obj.connect_timeout)) + # Trigger any extra validation we need to do. + self._validate_conn(conn) + + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr - if hasattr(conn, 'sock'): + if getattr(conn, 'sock', None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -306,18 +319,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) - else: # None or a value + else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7+, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older + except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() except SocketTimeout: raise ReadTimeoutError( @@ -329,17 +341,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # http://bugs.python.org/issue10272 if 'timed out' in str(e) or \ 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise - except SocketError as e: # Platform-specific: Python 2 + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise @@ -364,7 +376,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() except Empty: - pass # Done. + pass # Done. def is_same_host(self, url): """ @@ -385,7 +397,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): return (scheme, host, port) == (self.scheme, self.host, self.port) - def urlopen(self, method, url, body=None, headers=None, retries=3, + def urlopen(self, method, url, body=None, headers=None, retries=None, redirect=True, assert_same_host=True, timeout=_Default, pool_timeout=None, release_conn=None, **response_kw): """ @@ -419,9 +431,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): these headers completely replace any pool-specific headers. :param retries: - Number of retries to allow before raising a MaxRetryError exception. - If `False`, then retries are disabled and any exception is raised - immediately. + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. :param redirect: If True, automatically handle redirects (status codes 301, 302, @@ -460,15 +483,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0 and retries is not False: - raise MaxRetryError(self, url) + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - raise HostChangedError(self, url, retries - 1) + raise HostChangedError(self, url, retries) conn = None @@ -484,10 +507,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): err = None try: - # Request a connection from the queue + # Request a connection from the queue. conn = self._get_conn(timeout=pool_timeout) - # Make the request on the httplib connection object + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, timeout=timeout, body=body, headers=headers) @@ -526,21 +549,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() conn = None - if not retries: - if isinstance(e, TimeoutError): - # TimeoutError is exempt from MaxRetryError-wrapping. - # FIXME: ... Not sure why. Add a reason here. - raise - - # Wrap unexpected exceptions with the most appropriate - # module-level exception and re-raise. - if isinstance(e, SocketError) and self.proxy: - raise ProxyError('Cannot connect to proxy.', e) + stacktrace = sys.exc_info()[2] + if isinstance(e, SocketError) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError('Connection aborted.', e) - if retries is False: - raise ConnectionError('Connection failed.', e) - - raise MaxRetryError(self, url, e) + retries = retries.increment(method, url, error=e, + _pool=self, _stacktrace=stacktrace) + retries.sleep() # Keep track of the error for the retry warning. err = e @@ -554,23 +571,43 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warning("Retrying (%d attempts remain) after connection " + log.warning("Retrying (%r) after connection " "broken by '%r': %s" % (retries, err, url)) - return self.urlopen(method, url, body, headers, retries - 1, + return self.urlopen(method, url, body, headers, retries, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, release_conn=release_conn, **response_kw) # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location and retries is not False: + if redirect_location: if response.status == 303: method = 'GET' + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=response.status): + retries = retries.increment(method, url, response=response, _pool=self) + retries.sleep() + log.info("Forced retry: %s" % url) + return self.urlopen(method, url, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response @@ -597,19 +634,17 @@ class HTTPSConnectionPool(HTTPConnectionPool): ConnectionCls = HTTPSConnection def __init__(self, host, port=None, - strict=False, timeout=None, maxsize=1, - block=False, headers=None, + strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, + block=False, headers=None, retries=None, _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None, **conn_kw): - if sys.version_info < (2, 7): # Python 2.6 or older - conn_kw.pop('source_address', None) - HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers, **conn_kw) + block, headers, retries, _proxy, _proxy_headers, + **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -617,7 +652,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - self.conn_kw = conn_kw def _prepare_conn(self, conn): """ @@ -633,7 +667,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ @@ -641,7 +674,12 @@ class HTTPSConnectionPool(HTTPConnectionPool): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - set_tunnel(self.host, self.port, self.proxy_headers) + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. conn.connect() @@ -667,21 +705,30 @@ class HTTPSConnectionPool(HTTPConnectionPool): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - extra_params.update(self.conn_kw) - conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, - **extra_params) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 + strict=self.strict, **self.conn_kw) return self._prepare_conn(conn) + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not getattr(conn, 'sock', None): # AppEngine might not have `.sock` + conn.connect() + + if not conn.is_verified: + warnings.warn(( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.org/en/latest/security.html ' + '(This warning will only appear once by default.)'), + InsecureRequestWarning) + def connection_from_url(url, **kw): """ @@ -698,7 +745,7 @@ def connection_from_url(url, **kw): :class:`.ConnectionPool`. Useful for specifying things like timeout, maxsize, headers, etc. - Example: :: + Example:: >>> conn = connection_from_url('http://google.com/') >>> r = conn.request('GET', '/') diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py index b8cd933..c6b266f 100644 --- a/requests/packages/urllib3/contrib/ntlmpool.py +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -1,9 +1,3 @@ -# urllib3/contrib/ntlmpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ NTLM authenticating pool, contributed by erikcederstran diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index 21a12c6..24de9e4 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -46,15 +46,18 @@ Module Variables ''' -from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName +try: + from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT + from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName +except SyntaxError as e: + raise ImportError(e) + import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from pyasn1.type import univ, constraint from socket import _fileobject, timeout import ssl import select -from cStringIO import StringIO from .. import connection from .. import util @@ -155,196 +158,43 @@ def get_subj_alt_name(peer_cert): return dns_name -class fileobject(_fileobject): - - def _wait_for_sock(self): - rd, wd, ed = select.select([self._sock], [], [], - self._sock.gettimeout()) - if not rd: - raise timeout() - - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - try: - data = self._sock.recv(left) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self._sock.recv - while True: - try: - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - break - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - - class WrappedSocket(object): - '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + '''API-compatibility wrapper for Python OpenSSL's Connection-class. - def __init__(self, connection, socket): + Note: _makefile_refs, _drop() and _reuse() are needed for the garbage + collector of pypy. + ''' + + def __init__(self, connection, socket, suppress_ragged_eofs=True): self.connection = connection self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs + self._makefile_refs = 0 def fileno(self): return self.socket.fileno() def makefile(self, mode, bufsize=-1): - return fileobject(self.connection, mode, bufsize) + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + else: + raise + except OpenSSL.SSL.WantReadError: + rd, wd, ed = select.select( + [self.socket], [], [], self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + else: + return self.recv(*args, **kwargs) + else: + return data def settimeout(self, timeout): return self.socket.settimeout(timeout) @@ -353,7 +203,10 @@ class WrappedSocket(object): return self.connection.sendall(data) def close(self): - return self.connection.shutdown() + if self._makefile_refs < 1: + return self.connection.shutdown() + else: + self._makefile_refs -= 1 def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() @@ -376,6 +229,15 @@ class WrappedSocket(object): ] } + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + def _verify_callback(cnx, x509, err_no, err_depth, return_code): return err_no == 0 diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index b4df831..7519ba9 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -1,9 +1,3 @@ -# urllib3/exceptions.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - ## Base Exceptions @@ -11,6 +5,11 @@ class HTTPError(Exception): "Base exception used by this module." pass +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + class PoolError(HTTPError): "Base exception for errors caused within a pool." @@ -44,27 +43,38 @@ class ProxyError(HTTPError): pass -class ConnectionError(HTTPError): - "Raised when a normal connection fails." +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." pass -class DecodeError(HTTPError): - "Raised when automatic decoding based on Content-Type fails." +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." pass +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + ## Leaf Exceptions class MaxRetryError(RequestError): - "Raised when the maximum number of retries is exceeded." + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ def __init__(self, pool, url, reason=None): self.reason = reason message = "Max retries exceeded with url: %s" % url if reason: - message += " (Caused by %s: %s)" % (type(reason), reason) + message += " (Caused by %r)" % reason else: message += " (Caused by redirect)" @@ -116,7 +126,12 @@ class ClosedPoolError(PoolError): pass -class LocationParseError(ValueError, HTTPError): +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): "Raised when get_host or similar fails to parse the URL input." def __init__(self, location): @@ -124,3 +139,18 @@ class LocationParseError(ValueError, HTTPError): HTTPError.__init__(self, message) self.location = location + + +class SecurityWarning(HTTPWarning): + "Warned when perfoming security reducing actions" + pass + + +class InsecureRequestWarning(SecurityWarning): + "Warned when making an unverified HTTPS request." + pass + + +class SystemTimeWarning(SecurityWarning): + "Warned when system time is suspected to be wrong" + pass diff --git a/requests/packages/urllib3/fields.py b/requests/packages/urllib3/fields.py index da79e92..c853f8d 100644 --- a/requests/packages/urllib3/fields.py +++ b/requests/packages/urllib3/fields.py @@ -1,9 +1,3 @@ -# urllib3/fields.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import email.utils import mimetypes @@ -78,9 +72,10 @@ class RequestField(object): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. - Supports constructing :class:`~urllib3.fields.RequestField` from parameter - of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) - tuple where the MIME type is optional. For example: :: + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example:: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), @@ -125,8 +120,8 @@ class RequestField(object): 'Content-Disposition' fields. :param header_parts: - A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as - `k1="v1"; k2="v2"; ...`. + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. """ parts = [] iterable = header_parts @@ -158,7 +153,8 @@ class RequestField(object): lines.append('\r\n') return '\r\n'.join(lines) - def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): """ Makes this request field into a multipart request field. @@ -172,6 +168,10 @@ class RequestField(object): """ self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) self.headers['Content-Type'] = content_type self.headers['Content-Location'] = content_location diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index e8b30bd..0fbf488 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -1,11 +1,4 @@ -# urllib3/filepost.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import codecs -import mimetypes from uuid import uuid4 from io import BytesIO @@ -38,10 +31,10 @@ def iter_field_objects(fields): i = iter(fields) for field in i: - if isinstance(field, RequestField): - yield field - else: - yield RequestField.from_tuples(*field) + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): diff --git a/requests/packages/urllib3/packages/ordered_dict.py b/requests/packages/urllib3/packages/ordered_dict.py index 7f8ee15..4479363 100644 --- a/requests/packages/urllib3/packages/ordered_dict.py +++ b/requests/packages/urllib3/packages/ordered_dict.py @@ -2,7 +2,6 @@ # Passes Python2.7's test suite and incorporates all the latest updates. # Copyright 2009 Raymond Hettinger, released under the MIT License. # http://code.activestate.com/recipes/576693/ - try: from thread import get_ident as _get_ident except ImportError: diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index f18ff2b..515dc96 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -1,9 +1,3 @@ -# urllib3/poolmanager.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import logging try: # Python 3 @@ -14,8 +8,10 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme +from .exceptions import LocationValueError from .request import RequestMethods -from .util import parse_url +from .util.url import parse_url +from .util.retry import Retry __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -49,7 +45,7 @@ class PoolManager(RequestMethods): Additional parameters are used to create fresh :class:`urllib3.connectionpool.ConnectionPool` instances. - Example: :: + Example:: >>> manager = PoolManager(num_pools=2) >>> r = manager.request('GET', 'http://google.com/') @@ -102,10 +98,11 @@ class PoolManager(RequestMethods): ``urllib3.connectionpool.port_by_scheme``. """ - scheme = scheme or 'http' + if not host: + raise LocationValueError("No host specified.") + scheme = scheme or 'http' port = port or port_by_scheme.get(scheme, 80) - pool_key = (scheme, host, port) with self.pools.lock: @@ -118,6 +115,7 @@ class PoolManager(RequestMethods): # Make a fresh ConnectionPool of the desired type pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool + return pool def connection_from_url(self, url): @@ -161,13 +159,18 @@ class PoolManager(RequestMethods): # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) - # RFC 2616, Section 10.3.4 + # RFC 7231, Section 6.4.4 if response.status == 303: method = 'GET' - log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + kw['retries'] = retries.increment(method, redirect_location) kw['redirect'] = redirect + + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, **kw) @@ -208,12 +211,16 @@ class ProxyManager(PoolManager): if not proxy.port: port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) + + assert proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % proxy.scheme + self.proxy = proxy self.proxy_headers = proxy_headers or {} - assert self.proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( num_pools, headers, **connection_pool_kw) @@ -248,10 +255,10 @@ class ProxyManager(PoolManager): # For proxied HTTPS requests, httplib sets the necessary headers # on the CONNECT to the proxy. For HTTP, we'll definitely # need to set 'Host' at the very least. - kw['headers'] = self._set_proxy_headers(url, kw.get('headers', - self.headers)) + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) - return super(ProxyManager, self).urlopen(method, url, redirect, **kw) + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) def proxy_from_url(url, **kw): diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index 2a92cc2..51fe238 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -1,9 +1,3 @@ -# urllib3/request.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - try: from urllib.parse import urlencode except ImportError: @@ -26,8 +20,8 @@ class RequestMethods(object): Specifically, - :meth:`.request_encode_url` is for sending requests whose fields are encoded - in the URL (such as GET, HEAD, DELETE). + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are encoded in the *body* of the request using multipart or www-form-urlencoded @@ -51,7 +45,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): # Abstract + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -61,8 +55,8 @@ class RequestMethods(object): ``fields`` based on the ``method`` used. This is a convenience method that requires the least amount of manual - effort. It can be used in most situations, while still having the option - to drop down to more specific methods when necessary, such as + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. """ @@ -70,12 +64,12 @@ class RequestMethods(object): if method in self._encode_url_methods: return self.request_encode_url(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) else: return self.request_encode_body(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) def request_encode_url(self, method, url, fields=None, **urlopen_kw): """ @@ -94,18 +88,18 @@ class RequestMethods(object): the body. This is useful for request methods like POST, PUT, PATCH, etc. When ``encode_multipart=True`` (default), then - :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the - payload with the appropriate content type. Otherwise + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise :meth:`urllib.urlencode` is used with the 'application/x-www-form-urlencoded' content type. Multipart encoding must be used when posting files, and it's reasonably - safe to use it in other times too. However, it may break request signing, - such as with OAuth. + safe to use it in other times too. However, it may break request + signing, such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where - the MIME type is optional. For example: :: + the MIME type is optional. For example:: fields = { 'foo': 'bar', @@ -119,17 +113,17 @@ class RequestMethods(object): When uploading a file, providing a filename (the first parameter of the tuple) is optional but recommended to best mimick behavior of browsers. - Note that if ``headers`` are supplied, the 'Content-Type' header will be - overwritten because it depends on the dynamic random boundary string + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ if encode_multipart: - body, content_type = encode_multipart_formdata(fields or {}, - boundary=multipart_boundary) + body, content_type = encode_multipart_formdata( + fields or {}, boundary=multipart_boundary) else: body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') + 'application/x-www-form-urlencoded') if headers is None: headers = self.headers diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index db44182..e69de95 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -1,22 +1,14 @@ -# urllib3/response.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -import logging import zlib import io +from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import DecodeError +from .exceptions import ProtocolError, DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type -from .util import is_fp_closed +from .connection import HTTPException, BaseSSLError +from .util.response import is_fp_closed -log = logging.getLogger(__name__) - class DeflateDecoder(object): @@ -56,7 +48,10 @@ class HTTPResponse(io.IOBase): HTTP Response container. Backwards-compatible to httplib's HTTPResponse but the response ``body`` is - loaded and decoded on-demand when the ``data`` property is accessed. + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. Extra parameters for behaviour not present in httplib.HTTPResponse: @@ -91,11 +86,14 @@ class HTTPResponse(io.IOBase): self.decode_content = decode_content self._decoder = None - self._body = body if body and isinstance(body, basestring) else None + self._body = None self._fp = None self._original_response = original_response self._fp_bytes_read = 0 + if body and isinstance(body, (basestring, binary_type)): + self._body = body + self._pool = pool self._connection = connection @@ -163,8 +161,8 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 2616 - # Section 3.5 + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() if self._decoder is None: if content_encoding in self.CONTENT_DECODERS: @@ -178,23 +176,42 @@ class HTTPResponse(io.IOBase): flush_decoder = False try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do not - # properly close the connection in all cases. There is no harm - # in redundantly calling close. - self._fp.close() + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if not 'read operation timed out' in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) self._fp_bytes_read += len(data) @@ -204,8 +221,7 @@ class HTTPResponse(io.IOBase): except (IOError, zlib.error) as e: raise DecodeError( "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e) + "failed to decode it." % content_encoding, e) if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) @@ -242,7 +258,6 @@ class HTTPResponse(io.IOBase): if data: yield data - @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -297,7 +312,7 @@ class HTTPResponse(io.IOBase): elif hasattr(self._fp, "fileno"): return self._fp.fileno() else: - raise IOError("The file-like object this HTTPResponse is wrapped " + raise IOError("The file-like object this HTTPResponse is wrapped " "around has no file descriptor") def flush(self): @@ -305,4 +320,14 @@ class HTTPResponse(io.IOBase): return self._fp.flush() def readable(self): + # This method is required for `io` module compatibility. return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[:len(temp)] = temp + return len(temp) diff --git a/requests/packages/urllib3/util/__init__.py b/requests/packages/urllib3/util/__init__.py index a40185e..8becc81 100644 --- a/requests/packages/urllib3/util/__init__.py +++ b/requests/packages/urllib3/util/__init__.py @@ -1,9 +1,4 @@ -# urllib3/util/__init__.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - +# For backwards compatibility, provide imports that used to be here. from .connection import is_connection_dropped from .request import make_headers from .response import is_fp_closed @@ -19,6 +14,8 @@ from .timeout import ( current_time, Timeout, ) + +from .retry import Retry from .url import ( get_host, parse_url, diff --git a/requests/packages/urllib3/util/connection.py b/requests/packages/urllib3/util/connection.py index 8deeab5..2156993 100644 --- a/requests/packages/urllib3/util/connection.py +++ b/requests/packages/urllib3/util/connection.py @@ -1,4 +1,4 @@ -from socket import error as SocketError +import socket try: from select import poll, POLLIN except ImportError: # `poll` doesn't exist on OSX and other platforms @@ -8,6 +8,7 @@ except ImportError: # `poll` doesn't exist on OSX and other platforms except ImportError: # `select` doesn't exist on AppEngine. select = False + def is_connection_dropped(conn): # Platform-specific """ Returns True if the connection is dropped and should be closed. @@ -22,7 +23,7 @@ def is_connection_dropped(conn): # Platform-specific if sock is False: # Platform-specific: AppEngine return False if sock is None: # Connection already closed (such as by httplib). - return False + return True if not poll: if not select: # Platform-specific: AppEngine @@ -30,7 +31,7 @@ def is_connection_dropped(conn): # Platform-specific try: return select([sock], [], [], 0.0)[0] - except SocketError: + except socket.error: return True # This version is better on platforms that support it. @@ -42,4 +43,55 @@ def is_connection_dropped(conn): # Platform-specific return True +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + for opt in options: + sock.setsockopt(*opt) diff --git a/requests/packages/urllib3/util/request.py b/requests/packages/urllib3/util/request.py index d48d651..bc64f6b 100644 --- a/requests/packages/urllib3/util/request.py +++ b/requests/packages/urllib3/util/request.py @@ -1,13 +1,12 @@ from base64 import b64encode -from ..packages import six - +from ..packages.six import b ACCEPT_ENCODING = 'gzip,deflate' def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): + basic_auth=None, proxy_basic_auth=None, disable_cache=None): """ Shortcuts for generating request headers. @@ -32,7 +31,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, Colon-separated username:password string for 'proxy-authorization: basic ...' auth header. - Example: :: + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} @@ -57,12 +59,13 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, if basic_auth: headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') + b64encode(b(basic_auth)).decode('utf-8') if proxy_basic_auth: headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') - - return headers + b64encode(b(proxy_basic_auth)).decode('utf-8') + if disable_cache: + headers['cache-control'] = 'no-cache' + return headers diff --git a/requests/packages/urllib3/util/response.py b/requests/packages/urllib3/util/response.py index d0325bc..45fff55 100644 --- a/requests/packages/urllib3/util/response.py +++ b/requests/packages/urllib3/util/response.py @@ -5,9 +5,18 @@ def is_fp_closed(obj): :param obj: The file-like object to check. """ - if hasattr(obj, 'fp'): - # Object is a container for another file-like object that gets released - # on exhaustion (e.g. HTTPResponse) + + try: + # Check via the official file-like-object way. + return obj.closed + except AttributeError: + pass + + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). return obj.fp is None + except AttributeError: + pass - return obj.closed + raise ValueError("Unable to determine whether fp is closed.") diff --git a/requests/packages/urllib3/util/retry.py b/requests/packages/urllib3/util/retry.py new file mode 100644 index 0000000..eb560df --- /dev/null +++ b/requests/packages/urllib3/util/retry.py @@ -0,0 +1,279 @@ +import time +import logging + +from ..exceptions import ( + ProtocolError, + ConnectTimeoutError, + ReadTimeoutError, + MaxRetryError, +) +from ..packages import six + + +log = logging.getLogger(__name__) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + indempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + :param iterable status_forcelist: + A set of HTTP status codes that we should force a retry on. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts. urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.MAX_BACKOFF`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, _observed_errors=0): + + self.total = total + self.connect = connect + self.read = read + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self._observed_errors = _observed_errors # TODO: use .history instead? + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + _observed_errors=self._observed_errors, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + if self._observed_errors <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def sleep(self): + """ Sleep between retry attempts using an exponential backoff. + + By default, the backoff factor is 0 and this method will return + immediately. + """ + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we can't + assume that the server did not process any of it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def is_forced_retry(self, method, status_code): + """ Is this method/response retryable? (Based on method/codes whitelists) + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return self.status_forcelist and status_code in self.status_forcelist + + def is_exhausted(self): + """ Are we out of retries? + """ + retry_counts = (self.total, self.connect, self.read, self.redirect) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + _observed_errors = self._observed_errors + connect = self.connect + read = self.read + redirect = self.redirect + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + _observed_errors += 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False: + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + _observed_errors += 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + + else: + # FIXME: Nothing changed, scenario doesn't make sense. + _observed_errors += 1 + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, + _observed_errors=_observed_errors) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error) + + log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + + return new_retry + + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/requests/packages/urllib3/util/ssl_.py b/requests/packages/urllib3/util/ssl_.py index dee4b87..9cfe2d2 100644 --- a/requests/packages/urllib3/util/ssl_.py +++ b/requests/packages/urllib3/util/ssl_.py @@ -34,10 +34,9 @@ def assert_fingerprint(cert, fingerprint): } fingerprint = fingerprint.replace(':', '').lower() + digest_length, odd = divmod(len(fingerprint), 2) - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: + if odd or digest_length not in hashfunc_map: raise SSLError('Fingerprint is of invalid length.') # We need encode() here for py32; works on py2 and p33. diff --git a/requests/packages/urllib3/util/timeout.py b/requests/packages/urllib3/util/timeout.py index 4f947cb..ea7027f 100644 --- a/requests/packages/urllib3/util/timeout.py +++ b/requests/packages/urllib3/util/timeout.py @@ -1,32 +1,49 @@ +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user from socket import _GLOBAL_DEFAULT_TIMEOUT import time from ..exceptions import TimeoutStateError +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() def current_time(): """ - Retrieve the current time, this function is mocked out in unit testing. + Retrieve the current time. This function is mocked out in unit testing. """ return time.time() -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout +class Timeout(object): + """ Timeout configuration. + Timeouts can be defined as a default for a pool:: -class Timeout(object): - """ - Utility object for storing timeout values. + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``:: - Example usage: + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) - .. code-block:: python - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None :param connect: The maximum amount of time to wait for a connection attempt to a server @@ -47,25 +64,15 @@ class Timeout(object): :type read: integer, float, or None - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - .. note:: Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. In addition, the read and total timeouts only measure the time between read operations on the socket connecting the client and the server, @@ -73,8 +80,8 @@ class Timeout(object): response. For most requests, the timeout is raised because the server has not sent the first byte in the specified time. This is not always the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. + of 20 seconds will not trigger, even though the request will take + several minutes to complete. If your goal is to cut off any request after a set amount of wall clock time, consider having a second "watcher" thread to cut off a slow @@ -94,17 +101,16 @@ class Timeout(object): return '%s(connect=%r, read=%r, total=%r)' % ( type(self).__name__, self._connect, self._read, self.total) - @classmethod def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid + """ Check that a timeout attribute is valid. :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If the type is not an integer or a float, or if it + is a numeric value less than zero. """ if value is _Default: return cls.DEFAULT_TIMEOUT @@ -123,7 +129,7 @@ class Timeout(object): raise ValueError("Attempted to set %s timeout to %s, but the " "timeout cannot be set to a value less " "than 0." % (name, value)) - except TypeError: # Python 3 + except TypeError: # Python 3 raise ValueError("Timeout value %s was %s, but it must be an " "int or float." % (name, value)) @@ -135,12 +141,12 @@ class Timeout(object): The timeout value used by httplib.py sets the same timeout on the connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. + object that sets the individual timeouts to the ``timeout`` value + passed to this function. - :param timeout: The legacy timeout value + :param timeout: The legacy timeout value. :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object + :return: Timeout object :rtype: :class:`Timeout` """ return Timeout(read=timeout, connect=timeout) @@ -174,7 +180,7 @@ class Timeout(object): def get_connect_duration(self): """ Gets the time elapsed since the call to :meth:`start_connect`. - :return: the elapsed time + :return: Elapsed time. :rtype: float :raises urllib3.exceptions.TimeoutStateError: if you attempt to get duration for a timer that hasn't been started. @@ -191,7 +197,7 @@ class Timeout(object): This will be a positive float or integer, the value None (never timeout), or the default system timeout. - :return: the connect timeout + :return: Connect timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None """ if self.total is None: @@ -214,7 +220,7 @@ class Timeout(object): established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be raised. - :return: the value to use for the read timeout + :return: Value to use for the read timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` has not yet been called on this object. @@ -223,7 +229,7 @@ class Timeout(object): self.total is not self.DEFAULT_TIMEOUT and self._read is not None and self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. + # In case the connect timeout has not yet been established. if self._start_connect is None: return self._read return max(0, min(self.total - self.get_connect_duration(), diff --git a/requests/packages/urllib3/util/url.py b/requests/packages/urllib3/util/url.py index 362d216..487d456 100644 --- a/requests/packages/urllib3/util/url.py +++ b/requests/packages/urllib3/util/url.py @@ -3,15 +3,20 @@ from collections import namedtuple from ..exceptions import LocationParseError -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] + + +class Url(namedtuple('Url', url_attrs)): """ Datastructure for representing an HTTP URL. Used as a return value for :func:`parse_url`. """ slots = () - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) @property def hostname(self): @@ -43,7 +48,7 @@ def split_first(s, delims): If not found, then the first part is the full input string. - Example: :: + Example:: >>> split_first('foo/bar?baz', '?/=') ('foo', 'bar?baz', '/') @@ -76,7 +81,7 @@ def parse_url(url): Partly backwards-compatible with :mod:`urlparse`. - Example: :: + Example:: >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) @@ -91,6 +96,10 @@ def parse_url(url): # Additionally, this implementations does silly things to be optimal # on CPython. + if not url: + # Empty + return Url() + scheme = None auth = None host = None diff --git a/requests/sessions.py b/requests/sessions.py index df85a25..d701ff2 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -91,10 +91,17 @@ class SessionRedirectMixin(object): """Receives a Response. Returns a generator of Responses.""" i = 0 + hist = [] # keep track of history while resp.is_redirect: prepared_request = req.copy() + if i > 0: + # Update history and keep track of redirects. + hist.append(resp) + new_hist = list(hist) + resp.history = new_hist + try: resp.content # Consume socket so it can be released except (ChunkedEncodingError, ContentDecodingError, RuntimeError): @@ -118,7 +125,7 @@ class SessionRedirectMixin(object): parsed = urlparse(url) url = parsed.geturl() - # Facilitate non-RFC2616-compliant 'location' headers + # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. if not urlparse(url).netloc: @@ -127,8 +134,11 @@ class SessionRedirectMixin(object): url = requote_uri(url) prepared_request.url = to_native_string(url) + # Cache the url, unless it redirects to itself. + if resp.is_permanent_redirect and req.url != prepared_request.url: + self.redirect_cache[req.url] = prepared_request.url - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 + # http://tools.ietf.org/html/rfc7231#section-6.4.4 if (resp.status_code == codes.see_other and method != 'HEAD'): method = 'GET' @@ -146,7 +156,7 @@ class SessionRedirectMixin(object): prepared_request.method = method # https://github.com/kennethreitz/requests/issues/1084 - if resp.status_code not in (codes.temporary, codes.resume): + if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): if 'Content-Length' in prepared_request.headers: del prepared_request.headers['Content-Length'] @@ -261,9 +271,10 @@ class Session(SessionRedirectMixin): """ __attrs__ = [ - 'headers', 'cookies', 'auth', 'timeout', 'proxies', 'hooks', - 'params', 'verify', 'cert', 'prefetch', 'adapters', 'stream', - 'trust_env', 'max_redirects'] + 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', + 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', + 'max_redirects', 'redirect_cache' + ] def __init__(self): @@ -316,6 +327,8 @@ class Session(SessionRedirectMixin): self.mount('https://', HTTPAdapter()) self.mount('http://', HTTPAdapter()) + self.redirect_cache = {} + def __enter__(self): return self @@ -353,6 +366,7 @@ class Session(SessionRedirectMixin): url=request.url, files=request.files, data=request.data, + json=request.json, headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), params=merge_setting(request.params, self.params), auth=merge_setting(auth, self.auth), @@ -374,7 +388,8 @@ class Session(SessionRedirectMixin): hooks=None, stream=None, verify=None, - cert=None): + cert=None, + json=None): """Constructs a :class:`Request <Request>`, prepares it and sends it. Returns :class:`Response <Response>` object. @@ -384,17 +399,22 @@ class Session(SessionRedirectMixin): string for the :class:`Request`. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param json: (optional) json to send in the body of the + :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param files: (optional) Dictionary of 'filename': file-like-objects + :param files: (optional) Dictionary of ``'filename': file-like-objects`` for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the - request in seconds. - :param allow_redirects: (optional) Boolean. Set to True by default. + :param timeout: (optional) How long to wait for the server to send + data before giving up, as a float, or a (`connect timeout, read + timeout <user/advanced.html#timeouts>`_) tuple. + :type timeout: float or tuple + :param allow_redirects: (optional) Set to True by default. + :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param stream: (optional) whether to immediately download the response @@ -414,6 +434,7 @@ class Session(SessionRedirectMixin): headers = headers, files = files, data = data or {}, + json = json, params = params or {}, auth = auth, cookies = cookies, @@ -423,36 +444,16 @@ class Session(SessionRedirectMixin): proxies = proxies or {} - # Gather clues from the surrounding environment. - if self.trust_env: - # Set environment's proxies. - env_proxies = get_environ_proxies(url) or {} - for (k, v) in env_proxies.items(): - proxies.setdefault(k, v) - - # Look for configuration. - if not verify and verify is not False: - verify = os.environ.get('REQUESTS_CA_BUNDLE') - - # Curl compatibility. - if not verify and verify is not False: - verify = os.environ.get('CURL_CA_BUNDLE') - - # Merge all the kwargs. - proxies = merge_setting(proxies, self.proxies) - stream = merge_setting(stream, self.stream) - verify = merge_setting(verify, self.verify) - cert = merge_setting(cert, self.cert) + settings = self.merge_environment_settings( + prep.url, proxies, stream, verify, cert + ) # Send the request. send_kwargs = { - 'stream': stream, 'timeout': timeout, - 'verify': verify, - 'cert': cert, - 'proxies': proxies, 'allow_redirects': allow_redirects, } + send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) return resp @@ -487,15 +488,16 @@ class Session(SessionRedirectMixin): kwargs.setdefault('allow_redirects', False) return self.request('HEAD', url, **kwargs) - def post(self, url, data=None, **kwargs): + def post(self, url, data=None, json=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. """ - return self.request('POST', url, data=data, **kwargs) + return self.request('POST', url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): """Sends a PUT request. Returns :class:`Response` object. @@ -540,6 +542,14 @@ class Session(SessionRedirectMixin): if not isinstance(request, PreparedRequest): raise ValueError('You can only send PreparedRequests.') + checked_urls = set() + while request.url in self.redirect_cache: + checked_urls.add(request.url) + new_url = self.redirect_cache.get(request.url) + if new_url in checked_urls: + break + request.url = new_url + # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') @@ -597,6 +607,30 @@ class Session(SessionRedirectMixin): return r + def merge_environment_settings(self, url, proxies, stream, verify, cert): + """Check the environment and merge it with some settings.""" + # Gather clues from the surrounding environment. + if self.trust_env: + # Set environment's proxies. + env_proxies = get_environ_proxies(url) or {} + for (k, v) in env_proxies.items(): + proxies.setdefault(k, v) + + # Look for requests environment configuration and be compatible + # with cURL. + if verify is True or verify is None: + verify = (os.environ.get('REQUESTS_CA_BUNDLE') or + os.environ.get('CURL_CA_BUNDLE')) + + # Merge all the kwargs. + proxies = merge_setting(proxies, self.proxies) + stream = merge_setting(stream, self.stream) + verify = merge_setting(verify, self.verify) + cert = merge_setting(cert, self.cert) + + return {'verify': verify, 'proxies': proxies, 'stream': stream, + 'cert': cert} + def get_adapter(self, url): """Returns the appropriate connnection adapter for the given URL.""" for (prefix, adapter) in self.adapters.items(): diff --git a/requests/status_codes.py b/requests/status_codes.py index ed7a866..e0887f2 100644 --- a/requests/status_codes.py +++ b/requests/status_codes.py @@ -30,7 +30,8 @@ _codes = { 305: ('use_proxy',), 306: ('switch_proxy',), 307: ('temporary_redirect', 'temporary_moved', 'temporary'), - 308: ('resume_incomplete', 'resume'), + 308: ('permanent_redirect', + 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 # Client Error. 400: ('bad_request', 'bad'), diff --git a/requests/structures.py b/requests/structures.py index 9fd7818..3e5f2fa 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -8,30 +8,7 @@ Data structures that power Requests. """ -import os import collections -from itertools import islice - - -class IteratorProxy(object): - """docstring for IteratorProxy""" - def __init__(self, i): - self.i = i - # self.i = chain.from_iterable(i) - - def __iter__(self): - return self.i - - def __len__(self): - if hasattr(self.i, '__len__'): - return len(self.i) - if hasattr(self.i, 'len'): - return self.i.len - if hasattr(self.i, 'fileno'): - return os.fstat(self.i.fileno()).st_size - - def read(self, n): - return "".join(islice(self.i, None, n)) class CaseInsensitiveDict(collections.MutableMapping): @@ -46,7 +23,7 @@ class CaseInsensitiveDict(collections.MutableMapping): case of the last key to be set, and ``iter(instance)``, ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` will contain case-sensitive keys. However, querying and contains - testing is case insensitive: + testing is case insensitive:: cid = CaseInsensitiveDict() cid['Accept'] = 'application/json' diff --git a/requests/utils.py b/requests/utils.py index 68e50cf..1868f86 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -351,10 +351,7 @@ def get_unicode_from_response(r): Tried: 1. charset from content-type - - 2. every encodings from ``<meta ... charset=XXX>`` - - 3. fall back and replace all unicode characters + 2. fall back and replace all unicode characters """ @@ -554,7 +551,8 @@ def default_headers(): return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), 'Accept-Encoding': ', '.join(('gzip', 'deflate')), - 'Accept': '*/*' + 'Accept': '*/*', + 'Connection': 'keep-alive', }) @@ -671,3 +669,18 @@ def to_native_string(string, encoding='ascii'): out = string.decode(encoding) return out + + +def urldefragauth(url): + """ + Given a url remove the fragment and the authentication part + """ + scheme, netloc, path, params, query, fragment = urlparse(url) + + # see func:`prepend_scheme_if_needed` + if not netloc: + netloc, path = path, netloc + + netloc = netloc.rsplit('@', 1)[-1] + + return urlunparse((scheme, netloc, path, params, query, '')) |