From 224200a9815f792f93632d03a38e4f0763ae69ef Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:41:29 -0700 Subject: Imported Upstream version 2.0.0 --- HISTORY.rst | 49 ++++ PKG-INFO | 62 ++++- README.rst | 11 +- requests.egg-info/PKG-INFO | 62 ++++- requests.egg-info/SOURCES.txt | 1 + requests/__init__.py | 6 +- requests/adapters.py | 63 +++-- requests/auth.py | 15 +- requests/compat.py | 6 +- requests/cookies.py | 27 +- requests/exceptions.py | 6 +- requests/models.py | 118 ++++++--- requests/packages/urllib3/__init__.py | 2 +- requests/packages/urllib3/_collections.py | 16 +- requests/packages/urllib3/connectionpool.py | 288 ++++++++++++++++----- requests/packages/urllib3/contrib/ntlmpool.py | 2 +- requests/packages/urllib3/contrib/pyopenssl.py | 193 +++++++++++++- requests/packages/urllib3/exceptions.py | 30 ++- requests/packages/urllib3/fields.py | 177 +++++++++++++ requests/packages/urllib3/filepost.py | 59 +++-- .../packages/ssl_match_hostname/__init__.py | 67 +++-- requests/packages/urllib3/poolmanager.py | 121 +++++++-- requests/packages/urllib3/request.py | 2 +- requests/packages/urllib3/response.py | 74 +++++- requests/packages/urllib3/util.py | 260 ++++++++++++++++++- requests/sessions.py | 122 +++++---- requests/status_codes.py | 3 +- requests/structures.py | 2 +- requests/utils.py | 77 ++++-- test_requests.py | 179 ++++++++++++- 30 files changed, 1758 insertions(+), 342 deletions(-) create mode 100644 requests/packages/urllib3/fields.py diff --git a/HISTORY.rst b/HISTORY.rst index 693f017..0a0f647 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,55 @@ History ------- +2.0.0 (2013-09-24) +++++++++++++++++++ + +**API Changes:** + +- Keys in the Headers dictionary are now native strings on all Python versions, + i.e. bytestrings on Python 2, unicode on Python 3. +- Proxy URLs now *must* have an explicit scheme. A ``MissingSchema`` exception + will be raised if they don't. +- Timeouts now apply to read time if ``Stream=False``. +- ``RequestException`` is now a subclass of ``IOError``, not ``RuntimeError``. +- Added new method to ``PreparedRequest`` objects: ``PreparedRequest.copy()``. +- Added new method to ``Session`` objects: ``Session.update_request()``. This + method updates a ``Request`` object with the data (e.g. cookies) stored on + the ``Session``. +- Added new method to ``Session`` objects: ``Session.prepare_request()``. This + method updates and prepares a ``Request`` object, and returns the + corresponding ``PreparedRequest`` object. +- Added new method to ``HTTPAdapter`` objects: ``HTTPAdapter.proxy_headers()``. + This should not be called directly, but improves the subclass interface. +- ``httplib.IncompleteRead`` exceptions caused by incorrect chunked encoding + will now raise a Requests ``ChunkedEncodingError`` instead. +- Invalid percent-escape sequences now cause a Requests ``InvalidURL`` + exception to be raised. +- HTTP 208 no longer uses reason phrase ``"im_used"``. Correctly uses + ``"already_reported"``. +- HTTP 226 reason added (``"im_used"``). + +**Bugfixes:** + +- Vastly improved proxy support, including the CONNECT verb. Special thanks to + the many contributors who worked towards this improvement. +- Cookies are now properly managed when 401 authentication responses are + received. +- Chunked encoding fixes. +- Support for mixed case schemes. +- Better handling of streaming downloads. +- Retrieve environment proxies from more locations. +- Minor cookies fixes. +- Imroved redirect behaviour. +- Improved streaming behaviour, particularly for compressed data. +- Miscellaneous small Python 3 text encoding bugs. +- ``.netrc`` no longer overrides explicit auth. +- Cookies set by hooks are now correctly persisted on Sessions. +- Fix problem with cookies that specify port numbers in their host field. +- ``BytesIO`` can be used to perform streaming uploads. +- More generous parsing of the ``no_proxy`` environment variable. +- Non-string objects can be passed in data values alongside files. + 1.2.3 (2013-05-25) ++++++++++++++++++ diff --git a/PKG-INFO b/PKG-INFO index c221b03..68a05bb 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: requests -Version: 1.2.3 +Version: 2.0.0 Summary: Python HTTP for Humans. Home-page: http://python-requests.org Author: Kenneth Reitz @@ -22,6 +22,8 @@ License: Copyright 2013 Kenneth Reitz Description: Requests: HTTP for Humans ========================= + .. image:: https://badge.fury.io/py/requests.png + :target: http://badge.fury.io/py/requests .. image:: https://travis-ci.org/kennethreitz/requests.png?branch=master :target: https://travis-ci.org/kennethreitz/requests @@ -73,12 +75,13 @@ Description: Requests: HTTP for Humans - Multipart File Uploads - Connection Timeouts - Thread-safety + - HTTP(S) proxy support Installation ------------ - To install requests, simply: + To install Requests, simply: .. code-block:: bash @@ -102,13 +105,15 @@ Description: Requests: HTTP for Humans Contribute ---------- - #. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. There is a Contributor Friendly tag for issues that should be ideal for people who are not very familiar with the codebase yet. - #. Fork `the repository`_ on Github to start making your changes to the **master** branch (or branch off of it). + #. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. There is a `Contributor Friendly`_ tag for issues that should be ideal for people who are not very familiar with the codebase yet. + #. If you feel uncomfortable or uncertain about an issue or your changes, feel free to email @sigmavirus24 and he will happily help you via email, Skype, remote pairing or whatever you are comfortable with. + #. Fork `the repository`_ on GitHub to start making your changes to the **master** branch (or branch off of it). #. Write a test which shows that the bug was fixed or that the feature works as expected. #. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to AUTHORS_. .. _`the repository`: http://github.com/kennethreitz/requests .. _AUTHORS: https://github.com/kennethreitz/requests/blob/master/AUTHORS.rst + .. _Contributor Friendly: https://github.com/kennethreitz/requests/issues?direction=desc&labels=Contributor+Friendly&page=1&sort=updated&state=open .. :changelog: @@ -116,6 +121,55 @@ Description: Requests: HTTP for Humans History ------- + 2.0.0 (2013-09-24) + ++++++++++++++++++ + + **API Changes:** + + - Keys in the Headers dictionary are now native strings on all Python versions, + i.e. bytestrings on Python 2, unicode on Python 3. + - Proxy URLs now *must* have an explicit scheme. A ``MissingSchema`` exception + will be raised if they don't. + - Timeouts now apply to read time if ``Stream=False``. + - ``RequestException`` is now a subclass of ``IOError``, not ``RuntimeError``. + - Added new method to ``PreparedRequest`` objects: ``PreparedRequest.copy()``. + - Added new method to ``Session`` objects: ``Session.update_request()``. This + method updates a ``Request`` object with the data (e.g. cookies) stored on + the ``Session``. + - Added new method to ``Session`` objects: ``Session.prepare_request()``. This + method updates and prepares a ``Request`` object, and returns the + corresponding ``PreparedRequest`` object. + - Added new method to ``HTTPAdapter`` objects: ``HTTPAdapter.proxy_headers()``. + This should not be called directly, but improves the subclass interface. + - ``httplib.IncompleteRead`` exceptions caused by incorrect chunked encoding + will now raise a Requests ``ChunkedEncodingError`` instead. + - Invalid percent-escape sequences now cause a Requests ``InvalidURL`` + exception to be raised. + - HTTP 208 no longer uses reason phrase ``"im_used"``. Correctly uses + ``"already_reported"``. + - HTTP 226 reason added (``"im_used"``). + + **Bugfixes:** + + - Vastly improved proxy support, including the CONNECT verb. Special thanks to + the many contributors who worked towards this improvement. + - Cookies are now properly managed when 401 authentication responses are + received. + - Chunked encoding fixes. + - Support for mixed case schemes. + - Better handling of streaming downloads. + - Retrieve environment proxies from more locations. + - Minor cookies fixes. + - Imroved redirect behaviour. + - Improved streaming behaviour, particularly for compressed data. + - Miscellaneous small Python 3 text encoding bugs. + - ``.netrc`` no longer overrides explicit auth. + - Cookies set by hooks are now correctly persisted on Sessions. + - Fix problem with cookies that specify port numbers in their host field. + - ``BytesIO`` can be used to perform streaming uploads. + - More generous parsing of the ``no_proxy`` environment variable. + - Non-string objects can be passed in data values alongside files. + 1.2.3 (2013-05-25) ++++++++++++++++++ diff --git a/README.rst b/README.rst index 3d03641..597bd4d 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,8 @@ Requests: HTTP for Humans ========================= +.. image:: https://badge.fury.io/py/requests.png + :target: http://badge.fury.io/py/requests .. image:: https://travis-ci.org/kennethreitz/requests.png?branch=master :target: https://travis-ci.org/kennethreitz/requests @@ -52,12 +54,13 @@ Features - Multipart File Uploads - Connection Timeouts - Thread-safety +- HTTP(S) proxy support Installation ------------ -To install requests, simply: +To install Requests, simply: .. code-block:: bash @@ -81,10 +84,12 @@ Documentation is available at http://docs.python-requests.org/. Contribute ---------- -#. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. There is a Contributor Friendly tag for issues that should be ideal for people who are not very familiar with the codebase yet. -#. Fork `the repository`_ on Github to start making your changes to the **master** branch (or branch off of it). +#. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. There is a `Contributor Friendly`_ tag for issues that should be ideal for people who are not very familiar with the codebase yet. + #. If you feel uncomfortable or uncertain about an issue or your changes, feel free to email @sigmavirus24 and he will happily help you via email, Skype, remote pairing or whatever you are comfortable with. +#. Fork `the repository`_ on GitHub to start making your changes to the **master** branch (or branch off of it). #. Write a test which shows that the bug was fixed or that the feature works as expected. #. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to AUTHORS_. .. _`the repository`: http://github.com/kennethreitz/requests .. _AUTHORS: https://github.com/kennethreitz/requests/blob/master/AUTHORS.rst +.. _Contributor Friendly: https://github.com/kennethreitz/requests/issues?direction=desc&labels=Contributor+Friendly&page=1&sort=updated&state=open diff --git a/requests.egg-info/PKG-INFO b/requests.egg-info/PKG-INFO index c221b03..68a05bb 100644 --- a/requests.egg-info/PKG-INFO +++ b/requests.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: requests -Version: 1.2.3 +Version: 2.0.0 Summary: Python HTTP for Humans. Home-page: http://python-requests.org Author: Kenneth Reitz @@ -22,6 +22,8 @@ License: Copyright 2013 Kenneth Reitz Description: Requests: HTTP for Humans ========================= + .. image:: https://badge.fury.io/py/requests.png + :target: http://badge.fury.io/py/requests .. image:: https://travis-ci.org/kennethreitz/requests.png?branch=master :target: https://travis-ci.org/kennethreitz/requests @@ -73,12 +75,13 @@ Description: Requests: HTTP for Humans - Multipart File Uploads - Connection Timeouts - Thread-safety + - HTTP(S) proxy support Installation ------------ - To install requests, simply: + To install Requests, simply: .. code-block:: bash @@ -102,13 +105,15 @@ Description: Requests: HTTP for Humans Contribute ---------- - #. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. There is a Contributor Friendly tag for issues that should be ideal for people who are not very familiar with the codebase yet. - #. Fork `the repository`_ on Github to start making your changes to the **master** branch (or branch off of it). + #. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. There is a `Contributor Friendly`_ tag for issues that should be ideal for people who are not very familiar with the codebase yet. + #. If you feel uncomfortable or uncertain about an issue or your changes, feel free to email @sigmavirus24 and he will happily help you via email, Skype, remote pairing or whatever you are comfortable with. + #. Fork `the repository`_ on GitHub to start making your changes to the **master** branch (or branch off of it). #. Write a test which shows that the bug was fixed or that the feature works as expected. #. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to AUTHORS_. .. _`the repository`: http://github.com/kennethreitz/requests .. _AUTHORS: https://github.com/kennethreitz/requests/blob/master/AUTHORS.rst + .. _Contributor Friendly: https://github.com/kennethreitz/requests/issues?direction=desc&labels=Contributor+Friendly&page=1&sort=updated&state=open .. :changelog: @@ -116,6 +121,55 @@ Description: Requests: HTTP for Humans History ------- + 2.0.0 (2013-09-24) + ++++++++++++++++++ + + **API Changes:** + + - Keys in the Headers dictionary are now native strings on all Python versions, + i.e. bytestrings on Python 2, unicode on Python 3. + - Proxy URLs now *must* have an explicit scheme. A ``MissingSchema`` exception + will be raised if they don't. + - Timeouts now apply to read time if ``Stream=False``. + - ``RequestException`` is now a subclass of ``IOError``, not ``RuntimeError``. + - Added new method to ``PreparedRequest`` objects: ``PreparedRequest.copy()``. + - Added new method to ``Session`` objects: ``Session.update_request()``. This + method updates a ``Request`` object with the data (e.g. cookies) stored on + the ``Session``. + - Added new method to ``Session`` objects: ``Session.prepare_request()``. This + method updates and prepares a ``Request`` object, and returns the + corresponding ``PreparedRequest`` object. + - Added new method to ``HTTPAdapter`` objects: ``HTTPAdapter.proxy_headers()``. + This should not be called directly, but improves the subclass interface. + - ``httplib.IncompleteRead`` exceptions caused by incorrect chunked encoding + will now raise a Requests ``ChunkedEncodingError`` instead. + - Invalid percent-escape sequences now cause a Requests ``InvalidURL`` + exception to be raised. + - HTTP 208 no longer uses reason phrase ``"im_used"``. Correctly uses + ``"already_reported"``. + - HTTP 226 reason added (``"im_used"``). + + **Bugfixes:** + + - Vastly improved proxy support, including the CONNECT verb. Special thanks to + the many contributors who worked towards this improvement. + - Cookies are now properly managed when 401 authentication responses are + received. + - Chunked encoding fixes. + - Support for mixed case schemes. + - Better handling of streaming downloads. + - Retrieve environment proxies from more locations. + - Minor cookies fixes. + - Imroved redirect behaviour. + - Improved streaming behaviour, particularly for compressed data. + - Miscellaneous small Python 3 text encoding bugs. + - ``.netrc`` no longer overrides explicit auth. + - Cookies set by hooks are now correctly persisted on Sessions. + - Fix problem with cookies that specify port numbers in their host field. + - ``BytesIO`` can be used to perform streaming uploads. + - More generous parsing of the ``no_proxy`` environment variable. + - Non-string objects can be passed in data values alongside files. + 1.2.3 (2013-05-25) ++++++++++++++++++ diff --git a/requests.egg-info/SOURCES.txt b/requests.egg-info/SOURCES.txt index 524e6fd..c8d2582 100644 --- a/requests.egg-info/SOURCES.txt +++ b/requests.egg-info/SOURCES.txt @@ -68,6 +68,7 @@ requests/packages/urllib3/__init__.py requests/packages/urllib3/_collections.py requests/packages/urllib3/connectionpool.py requests/packages/urllib3/exceptions.py +requests/packages/urllib3/fields.py requests/packages/urllib3/filepost.py requests/packages/urllib3/poolmanager.py requests/packages/urllib3/request.py diff --git a/requests/__init__.py b/requests/__init__.py index 1af8d8e..837f0df 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -42,15 +42,15 @@ is at . """ __title__ = 'requests' -__version__ = '1.2.3' -__build__ = 0x010203 +__version__ = '2.0.0' +__build__ = 0x020000 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2013 Kenneth Reitz' # Attempt to enable urllib3's SNI support, if possible try: - from requests.packages.urllib3.contrib import pyopenssl + from .packages.urllib3.contrib import pyopenssl pyopenssl.inject_into_urllib3() except ImportError: pass diff --git a/requests/adapters.py b/requests/adapters.py index 98b7317..d557b74 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -11,11 +11,12 @@ and maintain connections. import socket from .models import Response -from .packages.urllib3.poolmanager import PoolManager, ProxyManager +from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse +from .packages.urllib3.util import Timeout as TimeoutSauce from .compat import urlparse, basestring, urldefrag, unquote from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - prepend_scheme_if_needed, get_auth_from_url) + except_on_missing_scheme, get_auth_from_url) from .structures import CaseInsensitiveDict from .packages.urllib3.exceptions import MaxRetryError from .packages.urllib3.exceptions import TimeoutError @@ -71,6 +72,7 @@ class HTTPAdapter(BaseAdapter): pool_block=DEFAULT_POOLBLOCK): self.max_retries = max_retries self.config = {} + self.proxy_manager = {} super(HTTPAdapter, self).__init__() @@ -118,7 +120,7 @@ class HTTPAdapter(BaseAdapter): :param verify: Whether we should actually verify the certificate. :param cert: The SSL certificate to verify. """ - if url.startswith('https') and verify: + if url.lower().startswith('https') and verify: cert_loc = None @@ -184,19 +186,26 @@ class HTTPAdapter(BaseAdapter): def get_connection(self, url, proxies=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the - :class:`HTTPAdapter `. + :class:`HTTPAdapter `. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. """ proxies = proxies or {} - proxy = proxies.get(urlparse(url).scheme) + proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: - proxy = prepend_scheme_if_needed(proxy, urlparse(url).scheme) - conn = ProxyManager(self.poolmanager.connection_from_url(proxy)) + except_on_missing_scheme(proxy) + proxy_headers = self.proxy_headers(proxy) + + if not proxy in self.proxy_manager: + self.proxy_manager[proxy] = proxy_from_url( + proxy, + proxy_headers=proxy_headers) + + conn = self.proxy_manager[proxy].connection_from_url(url) else: - conn = self.poolmanager.connection_from_url(url) + conn = self.poolmanager.connection_from_url(url.lower()) return conn @@ -214,7 +223,7 @@ class HTTPAdapter(BaseAdapter): If the message is being sent through a proxy, the full URL has to be used. Otherwise, we should only use the path portion of the URL. - This shoudl not be called from user code, and is only exposed for use + This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter `. @@ -232,8 +241,9 @@ class HTTPAdapter(BaseAdapter): return url def add_headers(self, request, **kwargs): - """Add any headers needed by the connection. Currently this adds a - Proxy-Authorization header. + """Add any headers needed by the connection. As of v2.0 this does + nothing by default, but is left for overriding by users that subclass + the :class:`HTTPAdapter `. This should not be called from user code, and is only exposed for use when subclassing the @@ -242,12 +252,22 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest ` to add headers to. :param kwargs: The keyword arguments from the call to send(). """ - proxies = kwargs.get('proxies', {}) + pass - if proxies is None: - proxies = {} + def proxy_headers(self, proxy): + """Returns a dictionary of the headers to add to any request sent + through a proxy. This works with urllib3 magic to ensure that they are + correctly sent to the proxy, rather than in a tunnelled request if + CONNECT is being used. - proxy = proxies.get(urlparse(request.url).scheme) + This should not be called from user code, and is only exposed for use + when subclassing the + :class:`HTTPAdapter `. + + :param proxies: The url of the proxy being used for this request. + :param kwargs: Optional additional keyword arguments. + """ + headers = {} username, password = get_auth_from_url(proxy) if username and password: @@ -255,8 +275,10 @@ class HTTPAdapter(BaseAdapter): # to decode them. username = unquote(username) password = unquote(password) - request.headers['Proxy-Authorization'] = _basic_auth_str(username, - password) + headers['Proxy-Authorization'] = _basic_auth_str(username, + password) + + return headers def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. @@ -273,10 +295,15 @@ class HTTPAdapter(BaseAdapter): self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request, proxies=proxies) + self.add_headers(request) chunked = not (request.body is None or 'Content-Length' in request.headers) + if stream: + timeout = TimeoutSauce(connect=timeout) + else: + timeout = TimeoutSauce(connect=timeout, read=timeout) + try: if not chunked: resp = conn.urlopen( diff --git a/requests/auth.py b/requests/auth.py index fab05cf..30529e2 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -18,7 +18,6 @@ from base64 import b64encode from .compat import urlparse, str from .utils import parse_dict_header - log = logging.getLogger(__name__) CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' @@ -106,7 +105,9 @@ class HTTPDigestAuth(AuthBase): A1 = '%s:%s:%s' % (self.username, realm, self.password) A2 = '%s:%s' % (method, path) - if qop == 'auth': + if qop is None: + respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2))) + elif qop == 'auth' or 'auth' in qop.split(','): if nonce == self.last_nonce: self.nonce_count += 1 else: @@ -121,8 +122,6 @@ class HTTPDigestAuth(AuthBase): cnonce = (hashlib.sha1(s).hexdigest()[:16]) noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, hash_utf8(A2)) respdig = KD(hash_utf8(A1), noncebit) - elif qop is None: - respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2))) else: # XXX handle auth-int. return None @@ -159,10 +158,14 @@ class HTTPDigestAuth(AuthBase): # to allow our new request to reuse the same one. r.content r.raw.release_conn() + prep = r.request.copy() + prep.prepare_cookies(r.cookies) - r.request.headers['Authorization'] = self.build_digest_header(r.request.method, r.request.url) - _r = r.connection.send(r.request, **kwargs) + prep.headers['Authorization'] = self.build_digest_header( + prep.method, prep.url) + _r = r.connection.send(prep, **kwargs) _r.history.append(r) + _r.request = prep return _r diff --git a/requests/compat.py b/requests/compat.py index bcf94b0..0d61a57 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -83,13 +83,14 @@ except ImportError: # --------- if is_py2: - from urllib import quote, unquote, quote_plus, unquote_plus, urlencode + from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag from urllib2 import parse_http_list import cookielib from Cookie import Morsel from StringIO import StringIO from .packages.urllib3.packages.ordered_dict import OrderedDict + from httplib import IncompleteRead builtin_str = str bytes = str @@ -100,11 +101,12 @@ if is_py2: elif is_py3: from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag - from urllib.request import parse_http_list + from urllib.request import parse_http_list, getproxies, proxy_bypass from http import cookiejar as cookielib from http.cookies import Morsel from io import StringIO from collections import OrderedDict + from http.client import IncompleteRead builtin_str = str str = str diff --git a/requests/cookies.py b/requests/cookies.py index d759d0a..f3ac64f 100644 --- a/requests/cookies.py +++ b/requests/cookies.py @@ -6,6 +6,7 @@ Compatibility code to be able to use `cookielib.CookieJar` with requests. requests.utils imports from here, so be careful with imports. """ +import time import collections from .compat import cookielib, urlparse, Morsel @@ -73,6 +74,10 @@ class MockRequest(object): def origin_req_host(self): return self.get_origin_req_host() + @property + def host(self): + return self.get_host() + class MockResponse(object): """Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`. @@ -102,6 +107,9 @@ def extract_cookies_to_jar(jar, request, response): :param request: our own requests.Request object :param response: urllib3.HTTPResponse object """ + if not (hasattr(response, '_original_response') and + response._original_response): + return # the _original_response field is the wrapped httplib.HTTPResponse object, req = MockRequest(request) # pull out the HTTPMessage with the headers and put it in the mock: @@ -258,6 +266,11 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): """Deletes a cookie given a name. Wraps cookielib.CookieJar's remove_cookie_by_name().""" remove_cookie_by_name(self, name) + def set_cookie(self, cookie, *args, **kwargs): + if cookie.value.startswith('"') and cookie.value.endswith('"'): + cookie.value = cookie.value.replace('\\"', '') + return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs) + def update(self, other): """Updates this jar with cookies from another CookieJar or dict-like""" if isinstance(other, cookielib.CookieJar): @@ -354,19 +367,23 @@ def create_cookie(name, value, **kwargs): def morsel_to_cookie(morsel): """Convert a Morsel object into a Cookie containing the one k/v pair.""" + expires = None + if morsel["max-age"]: + expires = time.time() + morsel["max-age"] + elif morsel['expires']: + expires = morsel['expires'] + if type(expires) == type(""): + time_template = "%a, %d-%b-%Y %H:%M:%S GMT" + expires = time.mktime(time.strptime(expires, time_template)) c = create_cookie( name=morsel.key, value=morsel.value, version=morsel['version'] or 0, port=None, - port_specified=False, domain=morsel['domain'], - domain_specified=bool(morsel['domain']), - domain_initial_dot=morsel['domain'].startswith('.'), path=morsel['path'], - path_specified=bool(morsel['path']), secure=bool(morsel['secure']), - expires=morsel['max-age'] or morsel['expires'], + expires=expires, discard=False, comment=morsel['comment'], comment_url=bool(morsel['comment']), diff --git a/requests/exceptions.py b/requests/exceptions.py index c0588f6..22207e3 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -9,7 +9,7 @@ This module contains the set of Requests' exceptions. """ -class RequestException(RuntimeError): +class RequestException(IOError): """There was an ambiguous exception that occurred while handling your request.""" @@ -53,3 +53,7 @@ class InvalidSchema(RequestException, ValueError): class InvalidURL(RequestException, ValueError): """ The URL provided was somehow invalid. """ + + +class ChunkedEncodingError(RequestException): + """The server declared chunked encoding but sent an invalid chunk.""" diff --git a/requests/models.py b/requests/models.py index 6cf2aaa..8fd9735 100644 --- a/requests/models.py +++ b/requests/models.py @@ -11,7 +11,7 @@ import collections import logging import datetime -from io import BytesIO +from io import BytesIO, UnsupportedOperation from .hooks import default_hooks from .structures import CaseInsensitiveDict @@ -19,14 +19,16 @@ from .auth import HTTPBasicAuth from .cookies import cookiejar_from_dict, get_cookie_header from .packages.urllib3.filepost import encode_multipart_formdata from .packages.urllib3.util import parse_url -from .exceptions import HTTPError, RequestException, MissingSchema, InvalidURL +from .exceptions import ( + HTTPError, RequestException, MissingSchema, InvalidURL, + ChunkedEncodingError) from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, - iter_slices, guess_json_utf, super_len) + iter_slices, guess_json_utf, super_len, to_native_string) from .compat import ( - cookielib, urlparse, urlunparse, urlsplit, urlencode, str, bytes, StringIO, - is_py2, chardet, json, builtin_str, basestring) + cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, + is_py2, chardet, json, builtin_str, basestring, IncompleteRead) CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 @@ -92,8 +94,10 @@ class RequestEncodingMixin(object): if parameters are supplied as a dict. """ - if (not files) or isinstance(data, str): - return None + if (not files): + raise ValueError("Files must be provided.") + elif isinstance(data, basestring): + raise ValueError("Data must not be a string.") new_fields = [] fields = to_key_val_list(data or {}) @@ -104,6 +108,10 @@ class RequestEncodingMixin(object): val = [val] for v in val: if v is not None: + # Don't call str() on bytestrings: in Py3 it all goes wrong. + if not isinstance(v, bytes): + v = str(v) + new_fields.append( (field.decode('utf-8') if isinstance(field, bytes) else field, v.encode('utf-8') if isinstance(v, str) else v)) @@ -139,6 +147,9 @@ class RequestHooksMixin(object): def register_hook(self, event, hook): """Properly register a hook.""" + if event not in self.hooks: + raise ValueError('Unsupported event specified, with event name "%s"' % (event)) + if isinstance(hook, collections.Callable): self.hooks[event].append(hook) elif hasattr(hook, '__iter__'): @@ -184,8 +195,8 @@ class Request(RequestHooksMixin): url=None, headers=None, files=None, - data=dict(), - params=dict(), + data=None, + params=None, auth=None, cookies=None, hooks=None): @@ -209,7 +220,6 @@ class Request(RequestHooksMixin): self.params = params self.auth = auth self.cookies = cookies - self.hooks = hooks def __repr__(self): return '' % (self.method) @@ -217,19 +227,17 @@ class Request(RequestHooksMixin): def prepare(self): """Constructs a :class:`PreparedRequest ` for transmission and returns it.""" p = PreparedRequest() - - p.prepare_method(self.method) - p.prepare_url(self.url, self.params) - p.prepare_headers(self.headers) - p.prepare_cookies(self.cookies) - p.prepare_body(self.data, self.files) - p.prepare_auth(self.auth, self.url) - # Note that prepare_auth must be last to enable authentication schemes - # such as OAuth to work on a fully prepared request. - - # This MUST go after prepare_auth. Authenticators could add a hook - p.prepare_hooks(self.hooks) - + p.prepare( + method=self.method, + url=self.url, + headers=self.headers, + files=self.files, + data=self.data, + params=self.params, + auth=self.auth, + cookies=self.cookies, + hooks=self.hooks, + ) return p @@ -264,9 +272,34 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): #: dictionary of callback hooks, for internal usage. self.hooks = default_hooks() + def prepare(self, method=None, url=None, headers=None, files=None, + data=None, params=None, auth=None, cookies=None, hooks=None): + """Prepares the the entire request with the given parameters.""" + + self.prepare_method(method) + self.prepare_url(url, params) + self.prepare_headers(headers) + self.prepare_cookies(cookies) + self.prepare_body(data, files) + self.prepare_auth(auth, url) + # Note that prepare_auth must be last to enable authentication schemes + # such as OAuth to work on a fully prepared request. + + # This MUST go after prepare_auth. Authenticators could add a hook + self.prepare_hooks(hooks) + def __repr__(self): return '' % (self.method) + def copy(self): + p = PreparedRequest() + p.method = self.method + p.url = self.url + p.headers = self.headers + p.body = self.body + p.hooks = self.hooks + return p + def prepare_method(self, method): """Prepares the given HTTP method.""" self.method = method @@ -337,8 +370,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): """Prepares the given HTTP headers.""" if headers: - headers = dict((name.encode('ascii'), value) for name, value in headers.items()) - self.headers = CaseInsensitiveDict(headers) + self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items()) else: self.headers = CaseInsensitiveDict() @@ -352,7 +384,6 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): body = None content_type = None length = None - is_stream = False is_stream = all([ hasattr(data, '__iter__'), @@ -363,8 +394,8 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): try: length = super_len(data) - except (TypeError, AttributeError): - length = False + except (TypeError, AttributeError, UnsupportedOperation): + length = None if is_stream: body = data @@ -372,13 +403,10 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if files: raise NotImplementedError('Streamed bodies and files are mutually exclusive.') - if length: + if length is not None: self.headers['Content-Length'] = str(length) else: self.headers['Transfer-Encoding'] = 'chunked' - # Check if file, fo, generator, iterator. - # If not, run through normal process. - else: # Multi-part file uploads. if files: @@ -537,11 +565,22 @@ class Response(object): return iter_slices(self._content, chunk_size) def generate(): - while 1: - chunk = self.raw.read(chunk_size, decode_content=True) - if not chunk: - break - yield chunk + try: + # Special case for urllib3. + try: + for chunk in self.raw.stream(chunk_size, + decode_content=True): + yield chunk + except IncompleteRead as e: + raise ChunkedEncodingError(e) + except AttributeError: + # Standard file-like object. + while 1: + chunk = self.raw.read(chunk_size) + if not chunk: + break + yield chunk + self._content_consumed = True gen = generate() @@ -683,4 +722,9 @@ class Response(object): raise HTTPError(http_error_msg, response=self) def close(self): + """Closes the underlying file descriptor and releases the connection + back to the pool. + + *Note: Should not normally need to be called explicitly.* + """ return self.raw.release_conn() diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index bff80b8..73071f7 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -23,7 +23,7 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host +from .util import make_headers, get_host, Timeout # Set default logging handler to avoid "No handler found" warnings. diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index b35a736..282b8d5 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -5,7 +5,7 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php from collections import MutableMapping -from threading import Lock +from threading import RLock try: # Python 2.7+ from collections import OrderedDict @@ -40,18 +40,18 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func = dispose_func self._container = self.ContainerCls() - self._lock = Lock() + self.lock = RLock() def __getitem__(self, key): # Re-insert the item, moving it to the end of the eviction line. - with self._lock: + with self.lock: item = self._container.pop(key) self._container[key] = item return item def __setitem__(self, key, value): evicted_value = _Null - with self._lock: + with self.lock: # Possibly evict the existing value of 'key' evicted_value = self._container.get(key, _Null) self._container[key] = value @@ -65,21 +65,21 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func(evicted_value) def __delitem__(self, key): - with self._lock: + with self.lock: value = self._container.pop(key) if self.dispose_func: self.dispose_func(value) def __len__(self): - with self._lock: + with self.lock: return len(self._container) def __iter__(self): raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') def clear(self): - with self._lock: + with self.lock: # Copy pointers to all values, then wipe the mapping # under Python 2, this copies the list of values twice :-| values = list(self._container.values()) @@ -90,5 +90,5 @@ class RecentlyUsedContainer(MutableMapping): self.dispose_func(value) def keys(self): - with self._lock: + with self.lock: return self._container.keys() diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index f3e9260..691d4e2 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -4,12 +4,11 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging -import socket import errno +import logging from socket import error as SocketError, timeout as SocketTimeout -from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint +import socket try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -22,11 +21,15 @@ try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full + import Queue as _ # Platform-specific: Windows try: # Compiled with SSL? HTTPSConnection = object - BaseSSLError = None + + class BaseSSLError(BaseException): + pass + ssl = None try: # Python 3 @@ -41,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass -from .request import RequestMethods -from .response import HTTPResponse -from .util import get_host, is_connection_dropped, ssl_wrap_socket from .exceptions import ( ClosedPoolError, + ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, - TimeoutError, + ReadTimeoutError, + ProxyError, ) - -from .packages.ssl_match_hostname import match_hostname, CertificateError +from .packages.ssl_match_hostname import CertificateError, match_hostname from .packages import six - +from .request import RequestMethods +from .response import HTTPResponse +from .util import ( + assert_fingerprint, + get_host, + is_connection_dropped, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, + Timeout, +) xrange = six.moves.xrange @@ -93,11 +104,24 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - sock = socket.create_connection((self.host, self.port), self.timeout) + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) + if self._tunnel_host: + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Wrap socket using verification with the root certs in # trusted_root_certs self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, @@ -110,10 +134,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): if self.assert_fingerprint: assert_fingerprint(self.sock.getpeercert(binary_form=True), self.assert_fingerprint) - else: + elif self.assert_hostname is not False: match_hostname(self.sock.getpeercert(), self.assert_hostname or self.host) + ## Pool objects class ConnectionPool(object): @@ -126,6 +151,9 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): + # httplib doesn't like it when we include brackets in ipv6 addresses + host = host.strip('[]') + self.host = host self.port = port @@ -133,6 +161,8 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) class HTTPConnectionPool(ConnectionPool, RequestMethods): """ @@ -151,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): as a valid HTTP/1.0 or 1.1 status line, passed into :class:`httplib.HTTPConnection`. + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + :param timeout: - Socket timeout for each individual connection, can be a float. None - disables timeout. + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. :param maxsize: Number of connections to save that can be reused. More than 1 is useful @@ -171,20 +207,39 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param headers: Headers to include with all requests, unless other headers are given explicitly. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" """ scheme = 'http' - def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, - block=False, headers=None): + def __init__(self, host, port=None, strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, + headers=None, _proxy=None, _proxy_headers=None): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict + + # This is for backwards compatibility and can be removed once a timeout + # can only be set to a Timeout object + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + self.timeout = timeout + self.pool = self.QueueCls(maxsize) self.block = block + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): self.pool.put(None) @@ -200,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, - port=self.port, - strict=self.strict) + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + + return HTTPConnection(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + **extra_params) + def _get_conn(self, timeout=None): """ @@ -263,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): % self.host) # Connection never got put back into the pool, close it. - conn.close() + if conn: + conn.close() + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ Perform a request on a given httplib connection object taken from our pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. """ self.num_requests += 1 - if timeout is _Default: - timeout = self.timeout - - conn.timeout = timeout # This only does anything in Py26+ - conn.request(method, url, **httplib_request_kw) + timeout_obj = self._get_timeout(timeout) - # Set timeout - sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. - if sock: - sock.settimeout(timeout) + try: + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # request.py. It also calls makefile (recv) on the socket + conn.request(method, url, **httplib_request_kw) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, timeout_obj.connect_timeout)) + + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr + if hasattr(conn, 'sock') and \ + read_timeout is not None and \ + read_timeout is not Timeout.DEFAULT_TIMEOUT: + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + conn.sock.settimeout(read_timeout) + + # Receive the response from the server + try: + try: # Python 2.7+, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older + httplib_response = conn.getresponse() + except SocketTimeout: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) + + except SocketError as e: # Platform-specific: Python 2 + # See the above comment about EAGAIN in Python 3. In Python 2 we + # have to specifically catch it and throw the timeout error + if e.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, + "Read timed out. (read timeout=%s)" % read_timeout) + raise - try: # Python 2.7+, use buffering of HTTP responses - httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older - httplib_response = conn.getresponse() # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -367,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -375,7 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): use the pool on an HTTP proxy and request foreign hosts. :param timeout: - If specified, overrides the default timeout for this one request. + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. :param pool_timeout: If set and the pool is set to block=True, then this method will @@ -402,18 +522,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if retries < 0: raise MaxRetryError(self, url) - if timeout is _Default: - timeout = self.timeout - if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - host = "%s://%s" % (self.scheme, self.host) - if self.port: - host = "%s:%d" % (host, self.port) - raise HostChangedError(self, url, retries - 1) conn = None @@ -444,20 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.release_conn()`` is called (implicitly by # ``response.read()``) - except Empty as e: + except Empty: # Timed out by queue - raise TimeoutError(self, url, - "Request timed out. (pool_timeout=%s)" % - pool_timeout) + raise ReadTimeoutError( + self, url, "Read timed out, no pool connections are available.") - except SocketTimeout as e: + except SocketTimeout: # Timed out by socket - raise TimeoutError(self, url, - "Request timed out. (timeout=%s)" % - timeout) + raise ReadTimeoutError(self, url, "Read timed out.") except BaseSSLError as e: # SSL certificate error + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") raise SSLError(e) except CertificateError as e: @@ -465,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise SSLError(e) except (HTTPException, SocketError) as e: + if isinstance(e, SocketError) and self.proxy is not None: + raise ProxyError('Cannot connect to proxy. ' + 'Socket error: %s.' % e) + # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below @@ -513,6 +630,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and ``ssl_version`` are only used if :mod:`ssl` is available and are fed into @@ -525,13 +643,13 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, + _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None): - HTTPConnectionPool.__init__(self, host, port, - strict, timeout, maxsize, - block, headers) + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, + block, headers, _proxy, _proxy_headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -540,6 +658,34 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + def _prepare_conn(self, connection): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(connection, VerifiedHTTPSConnection): + connection.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + connection.ssl_version = self.ssl_version + + if self.proxy is not None: + # Python 2.7+ + try: + set_tunnel = connection.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = connection._set_tunnel + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib + # would improperly set Host: header to proxy's IP:port. + connection.connect() + + return connection + def _new_conn(self): """ Return a fresh :class:`httplib.HTTPSConnection`. @@ -548,26 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") - - return HTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - - connection = VerifiedHTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - - connection.ssl_version = self.ssl_version - - return connection + connection_class = HTTPSConnection + else: + connection_class = VerifiedHTTPSConnection + + extra_params = {} + if not six.PY3: # Python 2 + extra_params['strict'] = self.strict + connection = connection_class(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + + return self._prepare_conn(connection) def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py index 277ee0b..b8cd933 100644 --- a/requests/packages/urllib3/contrib/ntlmpool.py +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -33,7 +33,7 @@ class NTLMConnectionPool(HTTPSConnectionPool): def __init__(self, user, pw, authurl, *args, **kwargs): """ authurl is a random URL on the server that is protected by NTLM. - user is the Windows user, probably in the DOMAIN\username format. + user is the Windows user, probably in the DOMAIN\\username format. pw is the password for the user. """ super(NTLMConnectionPool, self).__init__(*args, **kwargs) diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index 5c4c6d8..d43bcd6 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -20,13 +20,13 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. ''' -from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, - SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT from ndg.httpsclient.subj_alt_name import SubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from socket import _fileobject import ssl +from cStringIO import StringIO from .. import connectionpool from .. import util @@ -99,6 +99,172 @@ def get_subj_alt_name(peer_cert): return dns_name +class fileobject(_fileobject): + + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + data = self._sock.recv(left) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self._sock.recv + while True: + try: + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + except OpenSSL.SSL.WantReadError: + continue + break + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except OpenSSL.SSL.WantReadError: + continue + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + + class WrappedSocket(object): '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' @@ -106,8 +272,11 @@ class WrappedSocket(object): self.connection = connection self.socket = socket + def fileno(self): + return self.socket.fileno() + def makefile(self, mode, bufsize=-1): - return _fileobject(self.connection, mode, bufsize) + return fileobject(self.connection, mode, bufsize) def settimeout(self, timeout): return self.socket.settimeout(timeout) @@ -115,10 +284,14 @@ class WrappedSocket(object): def sendall(self, data): return self.connection.sendall(data) + def close(self): + return self.connection.shutdown() + def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() + if not x509: - raise ssl.SSLError('') + return x509 if binary_form: return OpenSSL.crypto.dump_certificate( @@ -159,9 +332,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) cnx.set_connect_state() - try: - cnx.do_handshake() - except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake', e) + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + break return WrappedSocket(cnx, sock) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 2e2a259..98ef9ab 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -39,6 +39,11 @@ class SSLError(HTTPError): pass +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass @@ -70,8 +75,29 @@ class HostChangedError(RequestError): self.retries = retries -class TimeoutError(RequestError): - "Raised when a socket timeout occurs." +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + ` and :exc:`ConnectTimeoutErrors `. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" pass diff --git a/requests/packages/urllib3/fields.py b/requests/packages/urllib3/fields.py new file mode 100644 index 0000000..ed01765 --- /dev/null +++ b/requests/packages/urllib3/fields.py @@ -0,0 +1,177 @@ +# urllib3/fields.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetimes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except UnicodeEncodeError: + pass + else: + return result + if not six.PY3: # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from parameter + of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) + tuple where the MIME type is optional. For example: :: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as + `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value: + parts.append(self._render_part(name, value)) + + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers['Content-Disposition'] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index 470309a..4575582 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -1,5 +1,5 @@ # urllib3/filepost.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -12,6 +12,7 @@ from io import BytesIO from .packages import six from .packages.six import b +from .fields import RequestField writer = codecs.lookup('utf-8')[3] @@ -23,15 +24,38 @@ def choose_boundary(): return uuid4().hex -def get_content_type(filename): - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): """ Iterate over fields. + .. deprecated :: + + The addition of `~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + `~urllib3.fields.RequestField` objects, instead. + Supports list of (k, v) tuples and dicts. + """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) @@ -44,15 +68,7 @@ def encode_multipart_formdata(fields, boundary=None): Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields or list of (key, value) or (key, value, MIME type) - field tuples. The key is treated as the field name, and the value as - the body of the form-data bytes. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section and a suitable MIME type is guessed based on the - filename. If the value is a tuple of three elements, then the third - element is treated as an explicit MIME type of the form-data section. - - Field names and filenames must be unicode. + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). :param boundary: If not specified, then a random boundary will be generated using @@ -62,24 +78,11 @@ def encode_multipart_formdata(fields, boundary=None): if boundary is None: boundary = choose_boundary() - for fieldname, value in iter_fields(fields): + for field in iter_field_objects(fields): body.write(b('--%s\r\n' % (boundary))) - if isinstance(value, tuple): - if len(value) == 3: - filename, data, content_type = value - else: - filename, data = value - content_type = get_content_type(filename) - writer(body).write('Content-Disposition: form-data; name="%s"; ' - 'filename="%s"\r\n' % (fieldname, filename)) - body.write(b('Content-Type: %s\r\n\r\n' % - (content_type,))) - else: - data = value - writer(body).write('Content-Disposition: form-data; name="%s"\r\n' - % (fieldname)) - body.write(b'\r\n') + writer(body).write(field.render_headers()) + data = field.data if isinstance(data, int): data = str(data) # Backwards compatibility diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 9560b04..2d61ac2 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,23 +7,60 @@ __version__ = '3.2.2' class CertificateError(ValueError): pass -def _dnsname_to_pat(dn): +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ pats = [] - for frag in dn.split(r'.'): - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + if not dn: + return False + + parts = dn.split(r'.') + leftmost = parts[0] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in parts[1:]: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + def match_hostname(cert, hostname): """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. CertificateError is raised on failure. On success, the function returns nothing. @@ -34,7 +71,7 @@ def match_hostname(cert, hostname): san = cert.get('subjectAltName', ()) for key, value in san: if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if not dnsnames: @@ -45,7 +82,7 @@ def match_hostname(cert, hostname): # XXX according to RFC 2818, the most specific Common Name # must be used. if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): + if _dnsname_match(value, hostname): return dnsnames.append(value) if len(dnsnames) > 1: diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index ce0c248..e7f8667 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -6,9 +6,14 @@ import logging +try: # Python 3 + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin + from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import connection_from_url, port_by_scheme +from .connectionpool import port_by_scheme from .request import RequestMethods from .util import parse_url @@ -55,6 +60,8 @@ class PoolManager(RequestMethods): """ + proxy = None + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw @@ -94,20 +101,23 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' + port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) - # If the scheme, host, or port doesn't match existing open connections, - # open a new ConnectionPool. - pool = self.pools.get(pool_key) - if pool: - return pool + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool - # Make a fresh ConnectionPool of the desired type - pool = self._new_pool(scheme, host, port) - self.pools[pool_key] = pool + # Make a fresh ConnectionPool of the desired type + pool = self._new_pool(scheme, host, port) + self.pools[pool_key] = pool return pool def connection_from_url(self, url): @@ -139,12 +149,19 @@ class PoolManager(RequestMethods): if 'headers' not in kw: kw['headers'] = self.headers - response = conn.urlopen(method, u.request_uri, **kw) + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) redirect_location = redirect and response.get_redirect_location() if not redirect_location: return response + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + + # RFC 2616, Section 10.3.4 if response.status == 303: method = 'GET' @@ -154,15 +171,59 @@ class PoolManager(RequestMethods): return self.urlopen(method, redirect_location, **kw) -class ProxyManager(RequestMethods): +class ProxyManager(PoolManager): """ - Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. The ProxyManager - class will automatically set the 'Host' header if it is not provided. + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param poxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + """ - def __init__(self, proxy_pool): - self.proxy_pool = proxy_pool + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + assert self.proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http'): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme) def _set_proxy_headers(self, url, headers=None): """ @@ -171,22 +232,28 @@ class ProxyManager(RequestMethods): """ headers_ = {'Accept': '*/*'} - host = parse_url(url).host - if host: - headers_['Host'] = host + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc if headers: headers_.update(headers) - return headers_ - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." - kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) - return self.proxy_pool.urlopen(method, url, **kw) + u = parse_url(url) + + if u.scheme == "http": + # It's too late to set proxy headers on per-request basis for + # tunnelled HTTPS connections, should use + # constructor's proxy_headers instead. + kw['headers'] = self._set_proxy_headers(url, kw.get('headers', + self.headers)) + kw['headers'].update(self.proxy_headers) + + return super(ProxyManager, self).urlopen(method, url, redirect, **kw) -def proxy_from_url(url, **pool_kw): - proxy_pool = connection_from_url(url, **pool_kw) - return ProxyManager(proxy_pool) +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index bf0256e..66a9a0e 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -30,7 +30,7 @@ class RequestMethods(object): in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are - encoded in the *body* of the request using multipart or www-orm-urlencoded + encoded in the *body* of the request using multipart or www-form-urlencoded (such as for POST, PUT, PATCH). :meth:`.request` is for making any kind of request, it will look up the diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 2fa4078..4efff5a 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -1,5 +1,5 @@ # urllib3/response.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -7,9 +7,11 @@ import logging import zlib +import io from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type +from .util import is_fp_closed log = logging.getLogger(__name__) @@ -48,7 +50,7 @@ def _get_decoder(mode): return DeflateDecoder() -class HTTPResponse(object): +class HTTPResponse(io.IOBase): """ HTTP Response container. @@ -72,6 +74,7 @@ class HTTPResponse(object): """ CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -105,7 +108,7 @@ class HTTPResponse(object): code and valid location. ``None`` if redirect status and no location. ``False`` if not a redirect status code. """ - if self.status in [301, 302, 303, 307]: + if self.status in self.REDIRECT_STATUSES: return self.headers.get('location') return False @@ -183,11 +186,13 @@ class HTTPResponse(object): try: if decode_content and self._decoder: data = self._decoder.decompress(data) - except (IOError, zlib.error): - raise DecodeError("Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding) + except (IOError, zlib.error) as e: + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e) - if flush_decoder and self._decoder: + if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) data += buf + self._decoder.flush() @@ -200,6 +205,29 @@ class HTTPResponse(object): if self._original_response and self._original_response.isclosed(): self.release_conn() + def stream(self, amt=2**16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -239,3 +267,35 @@ class HTTPResponse(object): def getheader(self, name, default=None): return self.headers.get(name, default) + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + @property + def closed(self): + if self._fp is None: + return True + elif hasattr(self._fp, 'closed'): + return self._fp.closed + elif hasattr(self._fp, 'isclosed'): # Python 2 + return self._fp.isclosed() + else: + return True + + def fileno(self): + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise IOError("The file-like object this HTTPResponse is wrapped " + "around has no file descriptor") + + def flush(self): + if self._fp is not None and hasattr(self._fp, 'flush'): + return self._fp.flush() + + def readable(self): + return True diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py index 544f9ed..266c9ed 100644 --- a/requests/packages/urllib3/util.py +++ b/requests/packages/urllib3/util.py @@ -6,10 +6,11 @@ from base64 import b64encode +from binascii import hexlify, unhexlify from collections import namedtuple -from socket import error as SocketError from hashlib import md5, sha1 -from binascii import hexlify, unhexlify +from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT +import time try: from select import poll, POLLIN @@ -31,9 +32,234 @@ try: # Test for SSL features except ImportError: pass - from .packages import six -from .exceptions import LocationParseError, SSLError +from .exceptions import LocationParseError, SSLError, TimeoutStateError + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + The maximum amount of time to wait for an HTTP request to connect and + return. This combines the connect and read timeouts into one. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, not + the total amount of time for the request to return a complete response. + As an example, you may want a request to return within 7 seconds or + fail, so you set the ``total`` timeout to 7 seconds. If the server + sends one byte to you every 5 seconds, the request will **not** trigger + time out. This case is admittedly rare. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, connect=_Default, read=_Default, total=None): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): @@ -61,6 +287,13 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', return uri + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + def split_first(s, delims): """ @@ -114,7 +347,7 @@ def parse_url(url): # While this code has overlap with stdlib's urlparse, it is much # simplified for our needs and less annoying. - # Additionally, this imeplementations does silly things to be optimal + # Additionally, this implementations does silly things to be optimal # on CPython. scheme = None @@ -143,7 +376,8 @@ def parse_url(url): # IPv6 if url and url[0] == '[': - host, url = url[1:].split(']', 1) + host, url = url.split(']', 1) + host += ']' # Port if ':' in url: @@ -341,6 +575,20 @@ def assert_fingerprint(cert, fingerprint): .format(hexlify(fingerprint_bytes), hexlify(cert_digest))) +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + if hasattr(obj, 'fp'): + # Object is a container for another file-like object that gets released + # on exhaustion (e.g. HTTPResponse) + return obj.fp is None + + return obj.closed + if SSLContext is not None: # Python 3.2+ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, diff --git a/requests/sessions.py b/requests/sessions.py index f4aeeee..aa956d3 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -71,15 +71,10 @@ class SessionRedirectMixin(object): """Receives a Response. Returns a generator of Responses.""" i = 0 - prepared_request = PreparedRequest() - prepared_request.body = req.body - prepared_request.headers = req.headers.copy() - prepared_request.hooks = req.hooks - prepared_request.method = req.method - prepared_request.url = req.url # ((resp.status_code is codes.see_other)) while (('location' in resp.headers and resp.status_code in REDIRECT_STATI)): + prepared_request = req.copy() resp.content # Consume socket so it can be released @@ -90,13 +85,18 @@ class SessionRedirectMixin(object): resp.close() url = resp.headers['location'] - method = prepared_request.method + method = req.method # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(resp.url) url = '%s:%s' % (parsed_rurl.scheme, url) + # The scheme should be lower case... + if '://' in url: + scheme, uri = url.split('://', 1) + url = '%s://%s' % (scheme.lower(), uri) + # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. @@ -109,12 +109,12 @@ class SessionRedirectMixin(object): # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if (resp.status_code == codes.see_other and - prepared_request.method != 'HEAD'): + method != 'HEAD'): method = 'GET' # Do what the browsers do, despite standards... if (resp.status_code in (codes.moved, codes.found) and - prepared_request.method not in ('GET', 'HEAD')): + method not in ('GET', 'HEAD')): method = 'GET' prepared_request.method = method @@ -153,7 +153,7 @@ class SessionRedirectMixin(object): class Session(SessionRedirectMixin): """A Requests session. - Provides cookie persistience, connection-pooling, and configuration. + Provides cookie persistence, connection-pooling, and configuration. Basic Usage:: @@ -208,7 +208,10 @@ class Session(SessionRedirectMixin): #: Should we trust the environment? self.trust_env = True - # Set up a CookieJar to be used by default + #: A CookieJar containing all currently outstanding cookies set on this + #: session. By default it is a + #: :class:`RequestsCookieJar `, but + #: may be any other ``cookielib.CookieJar`` compatible object. self.cookies = cookiejar_from_dict({}) # Default connection adapters. @@ -222,6 +225,46 @@ class Session(SessionRedirectMixin): def __exit__(self, *args): self.close() + def prepare_request(self, request): + """Constructs a :class:`PreparedRequest ` for + transmission and returns it. The :class:`PreparedRequest` has settings + merged from the :class:`Request ` instance and those of the + :class:`Session`. + + :param request: :class:`Request` instance to prepare with this + session's settings. + """ + cookies = request.cookies or {} + + # Bootstrap CookieJar. + if not isinstance(cookies, cookielib.CookieJar): + cookies = cookiejar_from_dict(cookies) + + # Merge with session cookies + merged_cookies = RequestsCookieJar() + merged_cookies.update(self.cookies) + merged_cookies.update(cookies) + + + # Set environment's basic authentication if not explicitly set. + auth = request.auth + if self.trust_env and not auth and not self.auth: + auth = get_netrc_auth(request.url) + + p = PreparedRequest() + p.prepare( + method=request.method.upper(), + url=request.url, + files=request.files, + data=request.data, + headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), + params=merge_setting(request.params, self.params), + auth=merge_setting(auth, self.auth), + cookies=merged_cookies, + hooks=merge_setting(request.hooks, self.hooks), + ) + return p + def request(self, method, url, params=None, data=None, @@ -265,20 +308,22 @@ class Session(SessionRedirectMixin): :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. """ + # Create the Request. + req = Request( + method = method.upper(), + url = url, + headers = headers, + files = files, + data = data or {}, + params = params or {}, + auth = auth, + cookies = cookies, + hooks = hooks, + ) + prep = self.prepare_request(req) - cookies = cookies or {} proxies = proxies or {} - # Bootstrap CookieJar. - if not isinstance(cookies, cookielib.CookieJar): - cookies = cookiejar_from_dict(cookies) - - # Merge with session cookies - merged_cookies = RequestsCookieJar() - merged_cookies.update(self.cookies) - merged_cookies.update(cookies) - cookies = merged_cookies - # Gather clues from the surrounding environment. if self.trust_env: # Set environment's proxies. @@ -286,10 +331,6 @@ class Session(SessionRedirectMixin): for (k, v) in env_proxies.items(): proxies.setdefault(k, v) - # Set environment's basic authentication. - if not auth: - auth = get_netrc_auth(url) - # Look for configuration. if not verify and verify is not False: verify = os.environ.get('REQUESTS_CA_BUNDLE') @@ -299,30 +340,11 @@ class Session(SessionRedirectMixin): verify = os.environ.get('CURL_CA_BUNDLE') # Merge all the kwargs. - params = merge_setting(params, self.params) - headers = merge_setting(headers, self.headers, dict_class=CaseInsensitiveDict) - auth = merge_setting(auth, self.auth) proxies = merge_setting(proxies, self.proxies) - hooks = merge_setting(hooks, self.hooks) stream = merge_setting(stream, self.stream) verify = merge_setting(verify, self.verify) cert = merge_setting(cert, self.cert) - # Create the Request. - req = Request() - req.method = method.upper() - req.url = url - req.headers = headers - req.files = files - req.data = data - req.params = params - req.auth = auth - req.cookies = cookies - req.hooks = hooks - - # Prepare the Request. - prep = req.prepare() - # Send the request. send_kwargs = { 'stream': stream, @@ -416,7 +438,7 @@ class Session(SessionRedirectMixin): # It's possible that users might accidentally send a Request object. # Guard against that specific failure case. - if getattr(request, 'prepare', None): + if not isinstance(request, PreparedRequest): raise ValueError('You can only send PreparedRequests.') # Set up variables needed for resolve_redirects and dispatching of @@ -443,6 +465,10 @@ class Session(SessionRedirectMixin): r = dispatch_hook('response', hooks, r, **kwargs) # Persist cookies + if r.history: + # If the hooks create history then we want those cookies too + for resp in r.history: + extract_cookies_to_jar(self.cookies, resp.request, resp.raw) extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. @@ -467,7 +493,7 @@ class Session(SessionRedirectMixin): """Returns the appropriate connnection adapter for the given URL.""" for (prefix, adapter) in self.adapters.items(): - if url.startswith(prefix): + if url.lower().startswith(prefix): return adapter # Nothing matches :-/ @@ -475,7 +501,7 @@ class Session(SessionRedirectMixin): def close(self): """Closes all adapters and as such the session""" - for _, v in self.adapters.items(): + for v in self.adapters.values(): v.close() def mount(self, prefix, adapter): diff --git a/requests/status_codes.py b/requests/status_codes.py index de38486..ed7a866 100644 --- a/requests/status_codes.py +++ b/requests/status_codes.py @@ -18,7 +18,8 @@ _codes = { 205: ('reset_content', 'reset'), 206: ('partial_content', 'partial'), 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), - 208: ('im_used',), + 208: ('already_reported',), + 226: ('im_used',), # Redirection. 300: ('multiple_choices',), diff --git a/requests/structures.py b/requests/structures.py index 8d02ea6..a175913 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -103,7 +103,7 @@ class CaseInsensitiveDict(collections.MutableMapping): # Copy is required def copy(self): - return CaseInsensitiveDict(self._store.values()) + return CaseInsensitiveDict(self._store.values()) def __repr__(self): return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/requests/utils.py b/requests/utils.py index b21bf8f..3ec6131 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -21,9 +21,11 @@ from netrc import netrc, NetrcParseError from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header -from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse +from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, + is_py2, is_py3, builtin_str, getproxies, proxy_bypass) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict +from .exceptions import MissingSchema, InvalidURL _hush_pyflakes = (RequestsCookieJar,) @@ -264,8 +266,12 @@ def get_encodings_from_content(content): """ charset_re = re.compile(r']', flags=re.I) + pragma_re = re.compile(r']', flags=re.I) + xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') - return charset_re.findall(content) + return (charset_re.findall(content) + + pragma_re.findall(content) + + xml_re.findall(content)) def get_encoding_from_headers(headers): @@ -301,7 +307,7 @@ def stream_decode_response_unicode(iterator, r): rv = decoder.decode(chunk) if rv: yield rv - rv = decoder.decode('', final=True) + rv = decoder.decode(b'', final=True) if rv: yield rv @@ -361,7 +367,11 @@ def unquote_unreserved(uri): for i in range(1, len(parts)): h = parts[i][0:2] if len(h) == 2 and h.isalnum(): - c = chr(int(h, 16)) + try: + c = chr(int(h, 16)) + except ValueError: + raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) + if c in UNRESERVED_SET: parts[i] = c + parts[i][2:] else: @@ -386,25 +396,17 @@ def requote_uri(uri): def get_environ_proxies(url): """Return a dict of environment proxies.""" - proxy_keys = [ - 'all', - 'http', - 'https', - 'ftp', - 'socks' - ] - get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. no_proxy = get_proxy('no_proxy') + netloc = urlparse(url).netloc if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. - no_proxy = no_proxy.split(',') - netloc = urlparse(url).netloc + no_proxy = no_proxy.replace(' ', '').split(',') for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): @@ -412,10 +414,15 @@ def get_environ_proxies(url): # to apply the proxies on this URL. return {} + # If the system proxy settings indicate that this URL should be bypassed, + # don't proxy. + if proxy_bypass(netloc): + return {} + # If we get here, we either didn't have no_proxy set or we're not going - # anywhere that no_proxy applies to. - proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys] - return dict([(key, val) for (key, val) in proxies if val]) + # anywhere that no_proxy applies to, and the system settings don't require + # bypassing the proxy for the current URL. + return getproxies() def default_user_agent(): @@ -526,18 +533,13 @@ def guess_json_utf(data): return None -def prepend_scheme_if_needed(url, new_scheme): - '''Given a URL that may or may not have a scheme, prepend the given scheme. - Does not replace a present scheme with the one provided as an argument.''' - scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) - - # urlparse is a finicky beast, and sometimes decides that there isn't a - # netloc present. Assume that it's being over-cautious, and switch netloc - # and path if urlparse decided there was no netloc. - if not netloc: - netloc, path = path, netloc +def except_on_missing_scheme(url): + """Given a URL, raise a MissingSchema exception if the scheme is missing. + """ + scheme, netloc, path, params, query, fragment = urlparse(url) - return urlunparse((scheme, netloc, path, params, query, fragment)) + if not scheme: + raise MissingSchema('Proxy URLs must have explicit schemes.') def get_auth_from_url(url): @@ -548,3 +550,22 @@ def get_auth_from_url(url): return (parsed.username, parsed.password) else: return ('', '') + + +def to_native_string(string, encoding='ascii'): + """ + Given a string object, regardless of type, returns a representation of that + string in the native string type, encoding and decoding where necessary. + This assumes ASCII unless told otherwise. + """ + out = None + + if isinstance(string, builtin_str): + out = string + else: + if is_py2: + out = string.encode(encoding) + else: + out = string.decode(encoding) + + return out diff --git a/test_requests.py b/test_requests.py index 4a4831e..d1e6ed0 100755 --- a/test_requests.py +++ b/test_requests.py @@ -10,9 +10,10 @@ import unittest import pickle import requests +import pytest from requests.auth import HTTPDigestAuth from requests.adapters import HTTPAdapter -from requests.compat import str, cookielib +from requests.compat import str, cookielib, getproxies, urljoin, urlparse from requests.cookies import cookiejar_from_dict from requests.exceptions import InvalidURL, MissingSchema from requests.structures import CaseInsensitiveDict @@ -23,11 +24,13 @@ except ImportError: import io as StringIO HTTPBIN = os.environ.get('HTTPBIN_URL', 'http://httpbin.org/') +# Issue #1483: Make sure the URL always has a trailing slash +HTTPBIN = HTTPBIN.rstrip('/') + '/' def httpbin(*suffix): """Returns url for HTTPBIN resource.""" - return HTTPBIN + '/'.join(suffix) + return urljoin(HTTPBIN, '/'.join(suffix)) class RequestsTestCase(unittest.TestCase): @@ -87,9 +90,23 @@ class RequestsTestCase(unittest.TestCase): self.assertEqual(request.url, "http://example.com/path?key=value&a=b#fragment") + def test_mixed_case_scheme_acceptable(self): + s = requests.Session() + s.proxies = getproxies() + parts = urlparse(httpbin('get')) + schemes = ['http://', 'HTTP://', 'hTTp://', 'HttP://', + 'https://', 'HTTPS://', 'hTTps://', 'HttPs://'] + for scheme in schemes: + url = scheme + parts.netloc + parts.path + r = requests.Request('GET', url) + r = s.send(r.prepare()) + self.assertEqual(r.status_code, 200, + "failed for scheme %s" % scheme) + def test_HTTP_200_OK_GET_ALTERNATIVE(self): r = requests.Request('GET', httpbin('get')) s = requests.Session() + s.proxies = getproxies() r = s.send(r.prepare()) @@ -142,6 +159,11 @@ class RequestsTestCase(unittest.TestCase): ) assert 'foo' not in s.cookies + def test_cookie_quote_wrapped(self): + s = requests.session() + s.get(httpbin('cookies/set?foo="bar:baz"')) + self.assertTrue(s.cookies['foo'] == '"bar:baz"') + def test_request_cookie_overrides_session_cookie(self): s = requests.session() s.cookies['foo'] = 'bar' @@ -161,6 +183,12 @@ class RequestsTestCase(unittest.TestCase): # Make sure the session cj is still the custom one assert s.cookies is cj + def test_requests_in_history_are_not_overridden(self): + resp = requests.get(httpbin('redirect/3')) + urls = [r.url for r in resp.history] + req_urls = [r.request.url for r in resp.history] + self.assertEquals(urls, req_urls) + def test_user_agent_transfers(self): heads = { @@ -200,6 +228,34 @@ class RequestsTestCase(unittest.TestCase): r = s.get(url) self.assertEqual(r.status_code, 200) + def test_basicauth_with_netrc(self): + auth = ('user', 'pass') + wrong_auth = ('wronguser', 'wrongpass') + url = httpbin('basic-auth', 'user', 'pass') + + def get_netrc_auth_mock(url): + return auth + requests.sessions.get_netrc_auth = get_netrc_auth_mock + + # Should use netrc and work. + r = requests.get(url) + self.assertEqual(r.status_code, 200) + + # Given auth should override and fail. + r = requests.get(url, auth=wrong_auth) + self.assertEqual(r.status_code, 401) + + s = requests.session() + + # Should use netrc and work. + r = s.get(url) + self.assertEqual(r.status_code, 200) + + # Given auth should override and fail. + s.auth = wrong_auth + r = s.get(url) + self.assertEqual(r.status_code, 401) + def test_DIGEST_HTTP_200_OK_GET(self): auth = HTTPDigestAuth('user', 'pass') @@ -212,10 +268,26 @@ class RequestsTestCase(unittest.TestCase): self.assertEqual(r.status_code, 401) s = requests.session() - s.auth = auth + s.auth = HTTPDigestAuth('user', 'pass') r = s.get(url) self.assertEqual(r.status_code, 200) + def test_DIGEST_AUTH_RETURNS_COOKIE(self): + url = httpbin('digest-auth', 'auth', 'user', 'pass') + auth = HTTPDigestAuth('user', 'pass') + r = requests.get(url) + assert r.cookies['fake'] == 'fake_value' + + r = requests.get(url, auth=auth) + assert r.status_code == 200 + + def test_DIGEST_AUTH_SETS_SESSION_COOKIES(self): + url = httpbin('digest-auth', 'auth', 'user', 'pass') + auth = HTTPDigestAuth('user', 'pass') + s = requests.Session() + s.get(url, auth=auth) + assert s.cookies['fake'] == 'fake_value' + def test_DIGEST_STREAM(self): auth = HTTPDigestAuth('user', 'pass') @@ -284,6 +356,12 @@ class RequestsTestCase(unittest.TestCase): except ValueError: pass + def test_conflicting_post_params(self): + url = httpbin('post') + with open('requirements.txt') as f: + pytest.raises(ValueError, "requests.post(url, data='[{\"some\": \"data\"}]', files={'some': f})") + pytest.raises(ValueError, "requests.post(url, data=u'[{\"some\": \"data\"}]', files={'some': f})") + def test_request_ok_set(self): r = requests.get(httpbin('status', '404')) self.assertEqual(r.ok, False) @@ -380,10 +458,28 @@ class RequestsTestCase(unittest.TestCase): prep = req.prepare() s = requests.Session() + s.proxies = getproxies() resp = s.send(prep) self.assertTrue(hasattr(resp, 'hook_working')) + def test_prepared_from_session(self): + class DummyAuth(requests.auth.AuthBase): + def __call__(self, r): + r.headers['Dummy-Auth-Test'] = 'dummy-auth-test-ok' + return r + + req = requests.Request('GET', httpbin('headers')) + self.assertEqual(req.auth, None) + + s = requests.Session() + s.auth = DummyAuth() + + prep = s.prepare_request(req) + resp = s.send(prep) + + self.assertTrue(resp.json()['headers']['Dummy-Auth-Test'], 'dummy-auth-test-ok') + def test_links(self): r = requests.Response() r.headers = { @@ -469,6 +565,7 @@ class RequestsTestCase(unittest.TestCase): s = requests.Session() s = pickle.loads(pickle.dumps(s)) + s.proxies = getproxies() r = s.send(r.prepare()) self.assertEqual(r.status_code, 200) @@ -482,20 +579,26 @@ class RequestsTestCase(unittest.TestCase): s.headers.update({'accept': 'application/json'}) r = s.get(httpbin('get')) headers = r.request.headers - # ASCII encode because of key comparison changes in py3 self.assertEqual( - headers['accept'.encode('ascii')], + headers['accept'], 'application/json' ) self.assertEqual( - headers['Accept'.encode('ascii')], + headers['Accept'], 'application/json' ) self.assertEqual( - headers['ACCEPT'.encode('ascii')], + headers['ACCEPT'], 'application/json' ) + def test_uppercase_scheme_redirect(self): + parts = urlparse(httpbin('html')) + url = "HTTP://" + parts.netloc + parts.path + r = requests.get(httpbin('redirect-to'), params={'url': url}) + self.assertEqual(r.status_code, 200) + self.assertEqual(r.url.lower(), url.lower()) + def test_transport_adapter_ordering(self): s = requests.Session() order = ['https://', 'http://'] @@ -557,6 +660,68 @@ class RequestsTestCase(unittest.TestCase): r = requests.Request('GET', url).prepare() self.assertEqual(r.url, url) + def test_header_keys_are_native(self): + headers = {u'unicode': 'blah', 'byte'.encode('ascii'): 'blah'} + r = requests.Request('GET', httpbin('get'), headers=headers) + p = r.prepare() + + # This is testing that they are builtin strings. A bit weird, but there + # we go. + self.assertTrue('unicode' in p.headers.keys()) + self.assertTrue('byte' in p.headers.keys()) + + def test_can_send_nonstring_objects_with_files(self): + data = {'a': 0.0} + files = {'b': 'foo'} + r = requests.Request('POST', httpbin('post'), data=data, files=files) + p = r.prepare() + + self.assertTrue('multipart/form-data' in p.headers['Content-Type']) + + +class TestContentEncodingDetection(unittest.TestCase): + + def test_none(self): + encodings = requests.utils.get_encodings_from_content('') + self.assertEqual(len(encodings), 0) + + def test_html_charset(self): + """HTML5 meta charset attribute""" + content = '' + encodings = requests.utils.get_encodings_from_content(content) + self.assertEqual(len(encodings), 1) + self.assertEqual(encodings[0], 'UTF-8') + + def test_html4_pragma(self): + """HTML4 pragma directive""" + content = '' + encodings = requests.utils.get_encodings_from_content(content) + self.assertEqual(len(encodings), 1) + self.assertEqual(encodings[0], 'UTF-8') + + def test_xhtml_pragma(self): + """XHTML 1.x served with text/html MIME type""" + content = '' + encodings = requests.utils.get_encodings_from_content(content) + self.assertEqual(len(encodings), 1) + self.assertEqual(encodings[0], 'UTF-8') + + def test_xml(self): + """XHTML 1.x served as XML""" + content = '' + encodings = requests.utils.get_encodings_from_content(content) + self.assertEqual(len(encodings), 1) + self.assertEqual(encodings[0], 'UTF-8') + + def test_precedence(self): + content = ''' + + + + '''.strip() + encodings = requests.utils.get_encodings_from_content(content) + self.assertEqual(encodings, ['HTML5', 'HTML4', 'XML']) + class TestCaseInsensitiveDict(unittest.TestCase): -- cgit v1.2.3